Refactor Code samples; Test code samples (#5036)
* Refactor code samples * Test docstrings * Style * Tokenization examples * Run rust of tests * First step to testing source docs * Style and BART comment * Test the remainder of the code samples * Style * let to const * Formatting fixes * Ready for merge * Fix fixture + Style * Fix last tests * Update docs/source/quicktour.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Addressing @sgugger's comments + Fix MobileBERT in TF Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -10,3 +10,7 @@
|
||||
.highlight .kn, .highlight .nv, .highlight .s2, .highlight .ow {
|
||||
color: #6670FF;
|
||||
}
|
||||
|
||||
.highlight .gp {
|
||||
color: #FB8D68;
|
||||
}
|
||||
@@ -44,6 +44,7 @@
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: flex-end;
|
||||
margin-right: 30px;
|
||||
}
|
||||
|
||||
.framework-selector > button {
|
||||
@@ -60,6 +61,12 @@
|
||||
padding: 5px;
|
||||
}
|
||||
|
||||
/* Copy button */
|
||||
|
||||
a.copybtn {
|
||||
margin: 3px;
|
||||
}
|
||||
|
||||
/* The literal code blocks */
|
||||
.rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
|
||||
color: #6670FF;
|
||||
|
||||
@@ -157,6 +157,8 @@ function platformToggle() {
|
||||
const codeBlocks = Array.from(document.getElementsByClassName("highlight"));
|
||||
const pytorchIdentifier = "## PYTORCH CODE";
|
||||
const tensorflowIdentifier = "## TENSORFLOW CODE";
|
||||
|
||||
const promptSpanIdentifier = `<span class="gp">>>> </span>`
|
||||
const pytorchSpanIdentifier = `<span class="c1">${pytorchIdentifier}</span>`;
|
||||
const tensorflowSpanIdentifier = `<span class="c1">${tensorflowIdentifier}</span>`;
|
||||
|
||||
@@ -169,10 +171,22 @@ function platformToggle() {
|
||||
let tensorflowSpans;
|
||||
|
||||
if(pytorchSpanPosition < tensorflowSpanPosition){
|
||||
pytorchSpans = spans.slice(pytorchSpanPosition + pytorchSpanIdentifier.length + 1, tensorflowSpanPosition);
|
||||
const isPrompt = spans.slice(
|
||||
spans.indexOf(tensorflowSpanIdentifier) - promptSpanIdentifier.length,
|
||||
spans.indexOf(tensorflowSpanIdentifier)
|
||||
) == promptSpanIdentifier;
|
||||
const finalTensorflowSpanPosition = isPrompt ? tensorflowSpanPosition - promptSpanIdentifier.length : tensorflowSpanPosition;
|
||||
|
||||
pytorchSpans = spans.slice(pytorchSpanPosition + pytorchSpanIdentifier.length + 1, finalTensorflowSpanPosition);
|
||||
tensorflowSpans = spans.slice(tensorflowSpanPosition + tensorflowSpanIdentifier.length + 1, spans.length);
|
||||
}else{
|
||||
tensorflowSpans = spans.slice(tensorflowSpanPosition + tensorflowSpanIdentifier.length + 1, pytorchSpanPosition);
|
||||
const isPrompt = spans.slice(
|
||||
spans.indexOf(pytorchSpanIdentifier) - promptSpanIdentifier.length,
|
||||
spans.indexOf(pytorchSpanIdentifier)
|
||||
) == promptSpanIdentifier;
|
||||
const finalPytorchSpanPosition = isPrompt ? pytorchSpanPosition - promptSpanIdentifier.length : pytorchSpanPosition;
|
||||
|
||||
tensorflowSpans = spans.slice(tensorflowSpanPosition + tensorflowSpanIdentifier.length + 1, finalPytorchSpanPosition);
|
||||
pytorchSpans = spans.slice(pytorchSpanPosition + pytorchSpanIdentifier.length + 1, spans.length);
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,8 @@ extensions = [
|
||||
'sphinx.ext.napoleon',
|
||||
'recommonmark',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx_markdown_tables'
|
||||
'sphinx_markdown_tables',
|
||||
'sphinx_copybutton'
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
@@ -74,6 +75,8 @@ exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store']
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = None
|
||||
|
||||
# Remove the prompt when copying examples
|
||||
copybutton_prompt_text = ">>> "
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
|
||||
@@ -45,17 +45,16 @@ tokenizer, which is a `WordPiece <https://arxiv.org/pdf/1609.08144.pdf>`__ token
|
||||
|
||||
::
|
||||
|
||||
from transformers import BertTokenizer
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
>>> from transformers import BertTokenizer
|
||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
|
||||
sequence = "A Titan RTX has 24GB of VRAM"
|
||||
>>> sequence = "A Titan RTX has 24GB of VRAM"
|
||||
|
||||
The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
|
||||
|
||||
::
|
||||
|
||||
tokenized_sequence = tokenizer.tokenize(sequence)
|
||||
print(tokenized_sequence)
|
||||
>>> tokenized_sequence = tokenizer.tokenize(sequence)
|
||||
|
||||
The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
|
||||
in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-dash is
|
||||
@@ -63,6 +62,7 @@ added for "RA" and "M":
|
||||
|
||||
::
|
||||
|
||||
>>> print(tokenized_sequence)
|
||||
['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
|
||||
|
||||
These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding
|
||||
@@ -71,14 +71,14 @@ the sentence to the tokenizer, which leverages the Rust implementation of
|
||||
|
||||
::
|
||||
|
||||
encoded_sequence = tokenizer(sequence)["input_ids"]
|
||||
print(encoded_sequence)
|
||||
>>> encoded_sequence = tokenizer(sequence)["input_ids"]
|
||||
|
||||
The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
|
||||
token indices are under the key "input_ids":
|
||||
|
||||
::
|
||||
|
||||
>>> print(encoded_sequence)
|
||||
[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
|
||||
|
||||
Note that the tokenizer automatically adds "special tokens" (if the associated model rely on them) which are special
|
||||
@@ -86,13 +86,14 @@ IDs the model sometimes uses. If we decode the previous sequence of ids,
|
||||
|
||||
::
|
||||
|
||||
tokenizer.decode(encoded_sequence)
|
||||
>>> decoded_sequence = tokenizer.decode(encoded_sequence)
|
||||
|
||||
we will see
|
||||
|
||||
::
|
||||
|
||||
'[CLS] A Titan RTX has 24GB of VRAM [SEP]'
|
||||
>>> print(decoded_sequence)
|
||||
[CLS] A Titan RTX has 24GB of VRAM [SEP]
|
||||
|
||||
because this is the way a :class:`~transformers.BertModel` is going to expect its inputs.
|
||||
|
||||
@@ -108,21 +109,20 @@ For example, consider these two sequences:
|
||||
|
||||
::
|
||||
|
||||
from transformers import BertTokenizer
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
>>> from transformers import BertTokenizer
|
||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
|
||||
sequence_a = "This is a short sequence."
|
||||
sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
|
||||
>>> sequence_a = "This is a short sequence."
|
||||
>>> sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
|
||||
|
||||
encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
|
||||
encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
|
||||
|
||||
len(encoded_sequence_a), len(encoded_sequence_b)
|
||||
>>> encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
|
||||
>>> encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
|
||||
|
||||
The encoded versions have different lengths:
|
||||
|
||||
::
|
||||
|
||||
>>> len(encoded_sequence_a), len(encoded_sequence_b)
|
||||
(8, 19)
|
||||
|
||||
Therefore, we can't be put then together in a same tensor as-is. The first sequence needs to be padded up to the length
|
||||
@@ -133,15 +133,14 @@ it to pad like this:
|
||||
|
||||
::
|
||||
|
||||
padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
|
||||
padded_sequences["input_ids"]
|
||||
>>> padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
|
||||
|
||||
We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
|
||||
|
||||
::
|
||||
|
||||
[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
|
||||
>>> padded_sequences["input_ids"]
|
||||
[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
|
||||
|
||||
This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating
|
||||
the position of the padded indices so that the model does not attend to them. For the
|
||||
@@ -150,14 +149,8 @@ a padded value. This attention mask is in the dictionary returned by the tokeniz
|
||||
|
||||
::
|
||||
|
||||
padded_sequences["attention_mask"]
|
||||
|
||||
will give back
|
||||
|
||||
::
|
||||
|
||||
[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
|
||||
>>> padded_sequences["attention_mask"]
|
||||
[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
|
||||
|
||||
.. _token-type-ids:
|
||||
|
||||
@@ -170,26 +163,27 @@ tokens. For example, the BERT model builds its two sequence input as such:
|
||||
|
||||
::
|
||||
|
||||
# [CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
|
||||
>>> # [CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
|
||||
|
||||
We can use our tokenizer to automatically generate such a sentence by passing the two sequences as two arguments (and
|
||||
not a list like before) like this:
|
||||
|
||||
::
|
||||
|
||||
from transformers import BertTokenizer
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
sequence_a = "HuggingFace is based in NYC"
|
||||
sequence_b = "Where is HuggingFace based?"
|
||||
>>> from transformers import BertTokenizer
|
||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
>>> sequence_a = "HuggingFace is based in NYC"
|
||||
>>> sequence_b = "Where is HuggingFace based?"
|
||||
|
||||
encoded_dict = tokenizer(sequence_a, sequence_b)
|
||||
tokenizer.decode(encoded_dict["input_ids"])
|
||||
>>> encoded_dict = tokenizer(sequence_a, sequence_b)
|
||||
>>> decoded = tokenizer.decode(encoded_dict["input_ids"])
|
||||
|
||||
which will return:
|
||||
|
||||
::
|
||||
|
||||
"[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]"
|
||||
>>> print(decoded)
|
||||
[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
|
||||
|
||||
This is enough for some models to understand where one sequence ends and where another begins. However, other models
|
||||
such as BERT have an additional mechanism, which are the token type IDs (also called segment IDs). They are a binary
|
||||
@@ -199,12 +193,7 @@ The tokenizer returns in the dictionary under the key "token_type_ids":
|
||||
|
||||
::
|
||||
|
||||
encoded_dict['token_type_ids']
|
||||
|
||||
will return
|
||||
|
||||
::
|
||||
|
||||
>>> encoded_dict['token_type_ids']
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
||||
|
||||
The first sequence, the "context" used for the question, has all its tokens represented by :obj:`0`, whereas the
|
||||
|
||||
@@ -36,10 +36,11 @@ Here is an example using the ``xlm-clm-enfr-1024`` checkpoint (Causal language m
|
||||
|
||||
.. code-block::
|
||||
|
||||
import torch
|
||||
from transformers import XLMTokenizer, XLMWithLMHeadModel
|
||||
>>> import torch
|
||||
>>> from transformers import XLMTokenizer, XLMWithLMHeadModel
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained("xlm-clm-1024-enfr")
|
||||
>>> tokenizer = XLMTokenizer.from_pretrained("xlm-clm-enfr-1024")
|
||||
>>> model = XLMWithLMHeadModel.from_pretrained("xlm-clm-enfr-1024")
|
||||
|
||||
|
||||
The different languages this model/tokenizer handles, as well as the ids of these languages are visible using the
|
||||
@@ -47,16 +48,15 @@ The different languages this model/tokenizer handles, as well as the ids of thes
|
||||
|
||||
.. code-block::
|
||||
|
||||
# Continuation of the previous script
|
||||
print(tokenizer.lang2id) # {'en': 0, 'fr': 1}
|
||||
>>> print(tokenizer.lang2id)
|
||||
{'en': 0, 'fr': 1}
|
||||
|
||||
|
||||
These ids should be used when passing a language parameter during a model pass. Let's define our inputs:
|
||||
|
||||
.. code-block::
|
||||
|
||||
# Continuation of the previous script
|
||||
input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")]) # batch size of 1
|
||||
>>> input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")]) # batch size of 1
|
||||
|
||||
|
||||
We should now define the language embedding by using the previously defined language id. We want to create a tensor
|
||||
@@ -64,20 +64,18 @@ filled with the appropriate language ids, of the same size as input_ids. For eng
|
||||
|
||||
.. code-block::
|
||||
|
||||
# Continuation of the previous script
|
||||
language_id = tokenizer.lang2id['en'] # 0
|
||||
langs = torch.tensor([language_id] * input_ids.shape[1]) # torch.tensor([0, 0, 0, ..., 0])
|
||||
>>> language_id = tokenizer.lang2id['en'] # 0
|
||||
>>> langs = torch.tensor([language_id] * input_ids.shape[1]) # torch.tensor([0, 0, 0, ..., 0])
|
||||
|
||||
# We reshape it to be of size (batch_size, sequence_length)
|
||||
langs = langs.view(1, -1) # is now of shape [1, sequence_length] (we have a batch size of 1)
|
||||
>>> # We reshape it to be of size (batch_size, sequence_length)
|
||||
>>> langs = langs.view(1, -1) # is now of shape [1, sequence_length] (we have a batch size of 1)
|
||||
|
||||
|
||||
You can then feed it all as input to your model:
|
||||
|
||||
.. code-block::
|
||||
|
||||
# Continuation of the previous script
|
||||
outputs = model(input_ids, langs=langs)
|
||||
>>> outputs = model(input_ids, langs=langs)
|
||||
|
||||
|
||||
The example `run_generation.py <https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py>`__
|
||||
|
||||
@@ -32,40 +32,33 @@ provides the following tasks out of the box:
|
||||
Let's see how this work for sentiment analysis (the other tasks are all covered in the
|
||||
:doc:`task summary </task_summary>`):
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
from transformers import pipeline
|
||||
classifier = pipeline('sentiment-analysis')
|
||||
>>> from transformers import pipeline
|
||||
>>> classifier = pipeline('sentiment-analysis')
|
||||
|
||||
When typing this command for the first time, a pretrained model and its tokenizer are downloaded and cached. We will
|
||||
look at both later on, but as an introduction the tokenizer's job is to preprocess the text for the model, which is
|
||||
then responsible for making predictions. The pipeline groups all of that together, and post-process the predictions to
|
||||
make them readable. For instance
|
||||
make them readable. For instance:
|
||||
|
||||
::
|
||||
|
||||
classifier('We are very happy to show you the 🤗 Transformers library.')
|
||||
|
||||
will return something like this:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
>>> classifier('We are very happy to show you the 🤗 Transformers library.')
|
||||
[{'label': 'POSITIVE', 'score': 0.9997795224189758}]
|
||||
|
||||
That's encouraging! You can use it on a list of sentences, which will be preprocessed then fed to the model as a
|
||||
`batch`:
|
||||
`batch`, returning a list of dictionaries like this one:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
classifier(["We are very happy to show you the 🤗 Transformers library.",
|
||||
"We hope you don't hate it."])
|
||||
|
||||
returning a list of dictionaries like this one:
|
||||
|
||||
::
|
||||
|
||||
[{'label': 'POSITIVE', 'score': 0.9997795224189758},
|
||||
{'label': 'NEGATIVE', 'score': 0.5308589935302734}]
|
||||
>>> results = classifier(["We are very happy to show you the 🤗 Transformers library.",
|
||||
... "We hope you don't hate it."])
|
||||
>>> for result in results:
|
||||
... print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
|
||||
label: POSITIVE, with score: 0.9998
|
||||
label: NEGATIVE, with score: 0.5309
|
||||
|
||||
You can see the second sentence has been classified as negative (it needs to be positive or negative) but its score is
|
||||
fairly neutral.
|
||||
@@ -83,9 +76,9 @@ see how we can use it.
|
||||
|
||||
You can directly pass the name of the model to use to :func:`~transformers.pipeline`:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
classifier = pipeline('sentiment-analysis', model="nlptown/bert-base-multilingual-uncased-sentiment")
|
||||
>>> classifier = pipeline('sentiment-analysis', model="nlptown/bert-base-multilingual-uncased-sentiment")
|
||||
|
||||
This classifier can now deal with texts in English, French, but also Dutch, German, Italian and Spanish! You can also
|
||||
replace that name by a local folder where you have saved a pretrained model (see below). You can also pass a model
|
||||
@@ -98,29 +91,30 @@ tokenizer associated to the model we picked and instantiate it. The second is
|
||||
the model itself. Note that if we were using the library on an other task, the class of the model would change. The
|
||||
:doc:`task summary </task_summary>` tutorial summarizes which class is used for which task.
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
## TENSORFLOW CODE
|
||||
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
||||
>>> ## PYTORCH CODE
|
||||
>>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
||||
|
||||
Now, to download the models and tokenizer we found previously, we just have to use the
|
||||
:func:`~transformers.AutoModelForSequenceClassification.from_pretrained` method (feel free to replace ``model_name`` by
|
||||
any other model from the model hub):
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
|
||||
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
pipe = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
|
||||
## TENSORFLOW CODE
|
||||
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
|
||||
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
|
||||
>>> model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
>>> pipe = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
|
||||
>>> # This model only exists in PyTorch, so we use the `from_pt` flag to import that model in TensorFlow.
|
||||
>>> model = TFAutoModelForSequenceClassification.from_pretrained(model_name, from_pt=True)
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
>>> classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
|
||||
|
||||
If you don't find a model that has been pretrained on some data similar to yours, you will need to fine-tune a
|
||||
pretrained model on your data. We provide :doc:`example scripts </examples>` to do so. Once you're done, don't forget
|
||||
@@ -136,16 +130,16 @@ using the :obj:`from_pretrained` method:
|
||||
|
||||
::
|
||||
|
||||
## PYTORCH CODE
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
## TENSORFLOW CODE
|
||||
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
||||
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
>>> model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
>>> pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
||||
>>> model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
Using the tokenizer
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
@@ -161,48 +155,56 @@ the model. To do this, the tokenizer has a `vocab`, which is the part we downloa
|
||||
|
||||
To apply these steps on a given text, we can just feed it to our tokenizer:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
input = tokenizer("We are very happy to show you the 🤗 Transformers library.")
|
||||
print(input)
|
||||
>>> inputs = tokenizer("We are very happy to show you the 🤗 Transformers library.")
|
||||
|
||||
This returns a dictionary string to list of ints. It contains the `ids of the tokens <glossary.html#input-ids>`__,
|
||||
as mentioned before, but also additional arguments that will be useful to the model. Here for instance, we also have an
|
||||
`attention mask <glossary.html#attention-mask>`__ that the model will use to have a better understanding of the sequence:
|
||||
|
||||
|
||||
::
|
||||
{'input_ids': [101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102],
|
||||
'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
|
||||
.. code-block::
|
||||
|
||||
>>> print(inputs)
|
||||
{'input_ids': [101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
|
||||
|
||||
You can pass a list of sentences directly to your tokenizer. If your goal is to send them through your model as a
|
||||
batch, you probably want to pad them all to the same length, truncate them to the maximum length the model can accept
|
||||
and get tensors back. You can specify all of that to the tokenizer:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
batch = tokenizer(
|
||||
["We are very happy to show you the 🤗 Transformers library.",
|
||||
"We hope you don't hate it."],
|
||||
padding=True, truncation=True, return_tensors="pt")
|
||||
print(batch)
|
||||
## TENSORFLOW CODE
|
||||
batch = tokenizer(
|
||||
["We are very happy to show you the 🤗 Transformers library.",
|
||||
"We hope you don't hate it."],
|
||||
padding=True, truncation=True, return_tensors="tf")
|
||||
print(batch)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> pt_batch = tokenizer(
|
||||
... ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
|
||||
... padding=True,
|
||||
... truncation=True,
|
||||
... return_tensors="pt"
|
||||
... )
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> tf_batch = tokenizer(
|
||||
... ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
|
||||
... padding=True,
|
||||
... truncation=True,
|
||||
... return_tensors="tf"
|
||||
... )
|
||||
|
||||
The padding is automatically applied on the side the model expect it (in this case, on the right), with the
|
||||
padding token the model was pretrained with. The attention mask is also adapted to take the padding into account:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
{'input_ids': tensor([[ 101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102],
|
||||
[ 101, 2057, 3246, 2017, 2123, 1005, 1056, 5223, 2009, 1012, 102, 0, 0, 0]]),
|
||||
'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]])}
|
||||
>>> ## PYTORCH CODE
|
||||
>>> for key, value in pt_batch.items():
|
||||
... print(f"{key}: {value.numpy().tolist()}")
|
||||
input_ids: [[101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], [101, 2057, 3246, 2017, 2123, 1005, 1056, 5223, 2009, 1012, 102, 0, 0, 0]]
|
||||
attention_mask: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> for key, value in tf_batch.items():
|
||||
... print(f"{key}: {value.numpy().tolist()}")
|
||||
input_ids: [[101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], [101, 2057, 3246, 2017, 2123, 1005, 1056, 5223, 2009, 1012, 102, 0, 0, 0]]
|
||||
attention_mask: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]
|
||||
|
||||
You can learn more about tokenizers :doc:`here <preprocessing>`.
|
||||
|
||||
@@ -213,20 +215,27 @@ Once your input has been preprocessed by the tokenizer, you can directly send it
|
||||
contain all the relevant information the model needs. If you're using a TensorFlow model, you can directly pass the
|
||||
dictionary keys to tensor, for a PyTorch model, you need to unpack the dictionary by adding :obj:`**`.
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
outputs = model(**batch)
|
||||
## TENSORFLOW CODE
|
||||
outputs = model(batch)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> pt_outputs = pt_model(**pt_batch)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> tf_outputs = tf_model(tf_batch)
|
||||
|
||||
In 🤗 Transformers, all outputs are tuples (with only one element potentially). Here, we get a tuple with just the
|
||||
final activations of the model.
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
(tensor([[-4.1329, 4.3811],
|
||||
[ 0.0818, -0.0418]]),)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> print(pt_outputs)
|
||||
(tensor([[-4.0833, 4.3364],
|
||||
[ 0.0818, -0.0418]], grad_fn=<AddmmBackward>),)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> print(tf_outputs)
|
||||
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
|
||||
array([[-4.0832963 , 4.3364134 ],
|
||||
[ 0.08181238, -0.04178794]], dtype=float32)>,)
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -235,33 +244,39 @@ final activations of the model.
|
||||
|
||||
Let's apply the SoftMax activation to get predictions.
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
import torch.nn.functional as F
|
||||
predictions = F.softmax(outputs[0], dim=-1)
|
||||
print(predictions)
|
||||
## TENSORFLOW CODE
|
||||
predictions = tf.nn.softmax(outputs[0], axis=-1)
|
||||
print(predictions)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> import torch.nn.functional as F
|
||||
>>> pt_predictions = F.softmax(pt_outputs[0], dim=-1)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> import tensorflow as tf
|
||||
>>> tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
|
||||
|
||||
We can see we get the numbers from before:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
tensor([[2.0060e-04, 9.9980e-01],
|
||||
[5.3086e-01, 4.6914e-01]])
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> print(tf_predictions)
|
||||
tf.Tensor(
|
||||
[[2.2042994e-04 9.9977952e-01]
|
||||
[5.3086078e-01 4.6913919e-01]], shape=(2, 2), dtype=float32)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> print(pt_predictions)
|
||||
tensor([[2.2043e-04, 9.9978e-01],
|
||||
[5.3086e-01, 4.6914e-01]], grad_fn=<SoftmaxBackward>)
|
||||
|
||||
If you have labels, you can provide them to the model, it will return a tuple with the loss and the final activations.
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
import torch
|
||||
outputs = model(**batch, labels = torch.tensor([1, 0])
|
||||
## TENSORFLOW CODE
|
||||
import tensorflow as tf
|
||||
outputs = model(batch, labels = tf.constant([1, 0])
|
||||
>>> ## PYTORCH CODE
|
||||
>>> import torch
|
||||
>>> pt_outputs = pt_model(**pt_batch, labels = torch.tensor([1, 0]))
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> import tensorflow as tf
|
||||
>>> tf_outputs = tf_model(tf_batch, labels = tf.constant([1, 0]))
|
||||
|
||||
Models are standard `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`__ or
|
||||
`tf.keras.Model <https://www.tensorflow.org/api_docs/python/tf/keras/Model>`__ so you can use them in your usual
|
||||
@@ -298,12 +313,12 @@ Lastly, you can also ask the model to return all hidden states and all attention
|
||||
|
||||
::
|
||||
|
||||
## PYTORCH CODE
|
||||
outputs = model(**batch, output_hidden_states=True, output_attentions=True)
|
||||
all_hidden_states, all_attentions = outputs[-2:]
|
||||
## TENSORFLOW CODE
|
||||
outputs = model(batch, output_hidden_states=True, output_attentions=True)
|
||||
all_hidden_states, all_attentions = outputs[-2:]
|
||||
>>> ## PYTORCH CODE
|
||||
>>> pt_outputs = pt_model(**pt_batch, output_hidden_states=True, output_attentions=True)
|
||||
>>> all_hidden_states, all_attentions = pt_outputs[-2:]
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> tf_outputs = tf_model(tf_batch, output_hidden_states=True, output_attentions=True)
|
||||
>>> all_hidden_states, all_attentions = tf_outputs[-2:]
|
||||
|
||||
Accessing the code
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
@@ -318,18 +333,18 @@ using the :doc:`DistilBERT </model_doc/distilbert>` architecture. The model auto
|
||||
to that specific model, or browse the source code. This is how you would directly instantiate model and tokenizer
|
||||
without the auto magic:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
model = DistilBertForSequenceClassification.from_pretrained(model_name)
|
||||
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
## TENSORFLOW CODE
|
||||
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
model = TFDistilBertForSequenceClassification.from_pretrained(model_name)
|
||||
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
>>> model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
>>> model = DistilBertForSequenceClassification.from_pretrained(model_name)
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
>>> model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
>>> model = TFDistilBertForSequenceClassification.from_pretrained(model_name)
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
|
||||
Customizing the model
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
@@ -345,18 +360,18 @@ Here we use the predefined vocabulary of DistilBERT (hence load the tokenizer wi
|
||||
instantiate the model from the configuration instead of using the
|
||||
:func:`~transformers.DistilBertForSequenceClassification.from_pretrained` method).
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
from transformers import DistilBertConfig, DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
config = DistilBertConfig(n_heads=8, dim=512, hidden_dim=4*512)
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
||||
model = DistilBertForSequenceClassification(config)
|
||||
## TENSORFLOW CODE
|
||||
from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
config = DistilBertConfig(n_heads=8, dim=512, hidden_dim=4*512)
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
||||
model = TFDistilBertForSequenceClassification(config)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> from transformers import DistilBertConfig, DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
>>> config = DistilBertConfig(n_heads=8, dim=512, hidden_dim=4*512)
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
||||
>>> model = DistilBertForSequenceClassification(config)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
>>> config = DistilBertConfig(n_heads=8, dim=512, hidden_dim=4*512)
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
||||
>>> model = TFDistilBertForSequenceClassification(config)
|
||||
|
||||
For something that only changes the head of the model (for instance, the number of labels), you can still use a
|
||||
pretrained model for the body. For instance, let's define a classifier for 10 different labels using a pretrained body.
|
||||
@@ -364,15 +379,15 @@ We could create a configuration with all the default values and just change the
|
||||
can directly pass any argument a configuration would take to the :func:`from_pretrained` method and it will update the
|
||||
default configuration with it:
|
||||
|
||||
::
|
||||
.. code-block::
|
||||
|
||||
## PYTORCH CODE
|
||||
from transformers import DistilBertConfig, DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
model_name = "distilbert-base-uncased"
|
||||
model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=10)
|
||||
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
## TENSORFLOW CODE
|
||||
from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
model_name = "distilbert-base-uncased"
|
||||
model = TFDistilBertForSequenceClassification.from_pretrained(model_name, num_labels=10)
|
||||
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> from transformers import DistilBertConfig, DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
>>> model_name = "distilbert-base-uncased"
|
||||
>>> model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=10)
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
>>> model_name = "distilbert-base-uncased"
|
||||
>>> model = TFDistilBertForSequenceClassification.from_pretrained(model_name, num_labels=10)
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained(model_name)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
2
setup.py
2
setup.py
@@ -86,7 +86,7 @@ extras["all"] = extras["serving"] + ["tensorflow", "torch"]
|
||||
|
||||
extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "psutil"]
|
||||
# sphinx-rtd-theme==0.5.0 introduced big changes in the style.
|
||||
extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rtd-theme==0.4.3"]
|
||||
extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rtd-theme==0.4.3", "sphinx-copybutton"]
|
||||
extras["quality"] = [
|
||||
"black",
|
||||
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort",
|
||||
|
||||
@@ -81,22 +81,22 @@ class AlbertConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import AlbertConfig, AlbertModel
|
||||
# Initializing an ALBERT-xxlarge style configuration
|
||||
albert_xxlarge_configuration = AlbertConfig()
|
||||
>>> from transformers import AlbertConfig, AlbertModel
|
||||
>>> # Initializing an ALBERT-xxlarge style configuration
|
||||
>>> albert_xxlarge_configuration = AlbertConfig()
|
||||
|
||||
# Initializing an ALBERT-base style configuration
|
||||
albert_base_configuration = AlbertConfig(
|
||||
hidden_size=768,
|
||||
num_attention_heads=12,
|
||||
intermediate_size=3072,
|
||||
)
|
||||
>>> # Initializing an ALBERT-base style configuration
|
||||
>>> albert_base_configuration = AlbertConfig(
|
||||
... hidden_size=768,
|
||||
... num_attention_heads=12,
|
||||
... intermediate_size=3072,
|
||||
... )
|
||||
|
||||
# Initializing a model from the ALBERT-base style configuration
|
||||
model = AlbertModel(albert_xxlarge_configuration)
|
||||
>>> # Initializing a model from the ALBERT-base style configuration
|
||||
>>> model = AlbertModel(albert_xxlarge_configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "albert"
|
||||
|
||||
@@ -73,9 +73,13 @@ class BartConfig(PretrainedConfig):
|
||||
):
|
||||
r"""
|
||||
:class:`~transformers.BartConfig` is the configuration class for `BartModel`.
|
||||
Examples:
|
||||
config = BartConfig.from_pretrained('bart-large')
|
||||
model = BartModel(config)
|
||||
|
||||
Examples::
|
||||
|
||||
>>> from transformers import BartConfig, BartModel
|
||||
|
||||
>>> config = BartConfig.from_pretrained('facebook/bart-large')
|
||||
>>> model = BartModel(config)
|
||||
"""
|
||||
if "hidden_size" in common_kwargs:
|
||||
raise ValueError("hidden size is called d_model")
|
||||
|
||||
@@ -95,16 +95,16 @@ class BertConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import BertModel, BertConfig
|
||||
>>> from transformers import BertModel, BertConfig
|
||||
|
||||
# Initializing a BERT bert-base-uncased style configuration
|
||||
configuration = BertConfig()
|
||||
>>> # Initializing a BERT bert-base-uncased style configuration
|
||||
>>> configuration = BertConfig()
|
||||
|
||||
# Initializing a model from the bert-base-uncased style configuration
|
||||
model = BertModel(configuration)
|
||||
>>> # Initializing a model from the bert-base-uncased style configuration
|
||||
>>> model = BertModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
model_type = "bert"
|
||||
|
||||
|
||||
@@ -66,16 +66,16 @@ class CTRLConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import CTRLModel, CTRLConfig
|
||||
>>> from transformers import CTRLModel, CTRLConfig
|
||||
|
||||
# Initializing a CTRL configuration
|
||||
configuration = CTRLConfig()
|
||||
>>> # Initializing a CTRL configuration
|
||||
>>> configuration = CTRLConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = CTRLModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = CTRLModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "ctrl"
|
||||
|
||||
@@ -80,16 +80,16 @@ class DistilBertConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import DistilBertModel, DistilBertConfig
|
||||
>>> from transformers import DistilBertModel, DistilBertConfig
|
||||
|
||||
# Initializing a DistilBERT configuration
|
||||
configuration = DistilBertConfig()
|
||||
>>> # Initializing a DistilBERT configuration
|
||||
>>> configuration = DistilBertConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = DistilBertModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = DistilBertModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
model_type = "distilbert"
|
||||
|
||||
|
||||
@@ -101,16 +101,16 @@ class ElectraConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import ElectraModel, ElectraConfig
|
||||
>>> from transformers import ElectraModel, ElectraConfig
|
||||
|
||||
# Initializing a ELECTRA electra-base-uncased style configuration
|
||||
configuration = ElectraConfig()
|
||||
>>> # Initializing a ELECTRA electra-base-uncased style configuration
|
||||
>>> configuration = ElectraConfig()
|
||||
|
||||
# Initializing a model from the electra-base-uncased style configuration
|
||||
model = ElectraModel(configuration)
|
||||
>>> # Initializing a model from the electra-base-uncased style configuration
|
||||
>>> model = ElectraModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
model_type = "electra"
|
||||
|
||||
|
||||
@@ -42,20 +42,20 @@ class EncoderDecoderConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
|
||||
>>> from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
|
||||
|
||||
# Initializing a BERT bert-base-uncased style configuration
|
||||
config_encoder = BertConfig()
|
||||
config_decoder = BertConfig()
|
||||
>>> # Initializing a BERT bert-base-uncased style configuration
|
||||
>>> config_encoder = BertConfig()
|
||||
>>> config_decoder = BertConfig()
|
||||
|
||||
config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
|
||||
>>> config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
|
||||
|
||||
# Initializing a Bert2Bert model from the bert-base-uncased style configurations
|
||||
model = EncoderDecoderModel(config=config)
|
||||
>>> # Initializing a Bert2Bert model from the bert-base-uncased style configurations
|
||||
>>> model = EncoderDecoderModel(config=config)
|
||||
|
||||
# Accessing the model configuration
|
||||
config_encoder = model.config.encoder
|
||||
config_decoder = model.config.decoder
|
||||
>>> # Accessing the model configuration
|
||||
>>> config_encoder = model.config.encoder
|
||||
>>> config_decoder = model.config.decoder
|
||||
"""
|
||||
model_type = "encoder_decoder"
|
||||
|
||||
|
||||
@@ -100,16 +100,16 @@ class GPT2Config(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import GPT2Model, GPT2Config
|
||||
>>> from transformers import GPT2Model, GPT2Config
|
||||
|
||||
# Initializing a GPT2 configuration
|
||||
configuration = GPT2Config()
|
||||
>>> # Initializing a GPT2 configuration
|
||||
>>> configuration = GPT2Config()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = GPT2Model(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = GPT2Model(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "gpt2"
|
||||
|
||||
@@ -49,16 +49,16 @@ class LongformerConfig(RobertaConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import LongformerConfig, LongformerModel
|
||||
>>> from transformers import LongformerConfig, LongformerModel
|
||||
|
||||
# Initializing a Longformer configuration
|
||||
configuration = LongformerConfig()
|
||||
>>> # Initializing a Longformer configuration
|
||||
>>> configuration = LongformerConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = LongformerModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = LongformerModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
model_type = "longformer"
|
||||
|
||||
|
||||
@@ -85,16 +85,16 @@ class MobileBertConfig(PretrainedConfig):
|
||||
|
||||
Example:
|
||||
|
||||
from transformers import MobileBertModel, MobileBertConfig
|
||||
>>> from transformers import MobileBertModel, MobileBertConfig
|
||||
|
||||
# Initializing a MobileBERT configuration
|
||||
configuration = MobileBertConfig()
|
||||
>>> # Initializing a MobileBERT configuration
|
||||
>>> configuration = MobileBertConfig()
|
||||
|
||||
# Initializing a model from the configuration above
|
||||
model = MobileBertModel(configuration)
|
||||
>>> # Initializing a model from the configuration above
|
||||
>>> model = MobileBertModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
|
||||
Attributes:
|
||||
pretrained_config_archive_map (Dict[str, str]):
|
||||
|
||||
@@ -98,16 +98,16 @@ class OpenAIGPTConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import OpenAIGPTConfig, OpenAIGPTModel
|
||||
>>> from transformers import OpenAIGPTConfig, OpenAIGPTModel
|
||||
|
||||
# Initializing a GPT configuration
|
||||
configuration = OpenAIGPTConfig()
|
||||
>>> # Initializing a GPT configuration
|
||||
>>> configuration = OpenAIGPTConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = OpenAIGPTModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = OpenAIGPTModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "openai-gpt"
|
||||
|
||||
@@ -125,16 +125,16 @@ class ReformerConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import ReformerModel, ReformerConfig
|
||||
>>> from transformers import ReformerModel, ReformerConfig
|
||||
|
||||
# Initializing a Reformer configuration
|
||||
configuration = ReformerConfig()
|
||||
>>> # Initializing a Reformer configuration
|
||||
>>> configuration = ReformerConfig()
|
||||
|
||||
# Initializing a Reformer model
|
||||
model = ReformerModel(configuration)
|
||||
>>> # Initializing a Reformer model
|
||||
>>> model = ReformerModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
model_type = "reformer"
|
||||
|
||||
|
||||
@@ -49,16 +49,16 @@ class RobertaConfig(BertConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import RobertaConfig, RobertaModel
|
||||
>>> from transformers import RobertaConfig, RobertaModel
|
||||
|
||||
# Initializing a RoBERTa configuration
|
||||
configuration = RobertaConfig()
|
||||
>>> # Initializing a RoBERTa configuration
|
||||
>>> configuration = RobertaConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = RobertaModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = RobertaModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
model_type = "roberta"
|
||||
|
||||
|
||||
@@ -100,16 +100,16 @@ class TransfoXLConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import TransfoXLConfig, TransfoXLModel
|
||||
>>> from transformers import TransfoXLConfig, TransfoXLModel
|
||||
|
||||
# Initializing a Transformer XL configuration
|
||||
configuration = TransfoXLConfig()
|
||||
>>> # Initializing a Transformer XL configuration
|
||||
>>> configuration = TransfoXLConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = TransfoXLModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = TransfoXLModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "transfo-xl"
|
||||
|
||||
@@ -142,16 +142,16 @@ class XLMConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import XLMConfig, XLMModel
|
||||
>>> from transformers import XLMConfig, XLMModel
|
||||
|
||||
# Initializing a XLM configuration
|
||||
configuration = XLMConfig()
|
||||
>>> # Initializing a XLM configuration
|
||||
>>> configuration = XLMConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = XLMModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = XLMModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "xlm"
|
||||
|
||||
@@ -113,16 +113,16 @@ class XLNetConfig(PretrainedConfig):
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import XLNetConfig, XLNetModel
|
||||
>>> from transformers import XLNetConfig, XLNetModel
|
||||
|
||||
# Initializing a XLNet configuration
|
||||
configuration = XLNetConfig()
|
||||
>>> # Initializing a XLNet configuration
|
||||
>>> configuration = XLNetConfig()
|
||||
|
||||
# Initializing a model from the configuration
|
||||
model = XLNetModel(configuration)
|
||||
>>> # Initializing a model from the configuration
|
||||
>>> model = XLNetModel(configuration)
|
||||
|
||||
# Accessing the model configuration
|
||||
configuration = model.config
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
"""
|
||||
|
||||
model_type = "xlnet"
|
||||
|
||||
@@ -488,11 +488,11 @@ class SquadProcessor(DataProcessor):
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow_datasets as tfds
|
||||
dataset = tfds.load("squad")
|
||||
>>> import tensorflow_datasets as tfds
|
||||
>>> dataset = tfds.load("squad")
|
||||
|
||||
training_examples = get_examples_from_dataset(dataset, evaluate=False)
|
||||
evaluation_examples = get_examples_from_dataset(dataset, evaluate=True)
|
||||
>>> training_examples = get_examples_from_dataset(dataset, evaluate=False)
|
||||
>>> evaluation_examples = get_examples_from_dataset(dataset, evaluate=True)
|
||||
"""
|
||||
|
||||
if evaluate:
|
||||
|
||||
@@ -186,6 +186,263 @@ def add_end_docstrings(*docstr):
|
||||
return docstring_decorator
|
||||
|
||||
|
||||
PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
|
||||
|
||||
>>> outputs = model(**inputs, labels=labels)
|
||||
>>> loss, scores = outputs[:2]
|
||||
"""
|
||||
|
||||
PT_QUESTION_ANSWERING_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> start_positions = torch.tensor([1])
|
||||
>>> end_positions = torch.tensor([3])
|
||||
|
||||
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
|
||||
>>> loss, start_scores, end_scores = outputs[:3]
|
||||
"""
|
||||
|
||||
PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
>>> outputs = model(**inputs, labels=labels)
|
||||
>>> loss, logits = outputs[:2]
|
||||
"""
|
||||
|
||||
PT_MASKED_LM_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
|
||||
|
||||
>>> outputs = model(input_ids, labels=input_ids)
|
||||
>>> loss, prediction_scores = outputs[:2]
|
||||
"""
|
||||
|
||||
PT_BASE_MODEL_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
|
||||
PT_MULTIPLE_CHOICE_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
>>> choice0 = "It is eaten with a fork and a knife."
|
||||
>>> choice1 = "It is eaten while held in the hand."
|
||||
>>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
|
||||
|
||||
>>> encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
>>> outputs = model(**{{k: v.unsqueeze(0) for k,v in encoding.items()}}, labels=labels) # batch size is 1
|
||||
|
||||
>>> # the linear classifier still needs to be trained
|
||||
>>> loss, logits = outputs[:2]
|
||||
"""
|
||||
|
||||
PT_CAUSAL_LM_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> import torch
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs, labels=inputs["input_ids"])
|
||||
>>> loss, logits = outputs[:2]
|
||||
"""
|
||||
|
||||
TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||
>>> input_ids = inputs["input_ids"]
|
||||
>>> inputs["labels"] = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
|
||||
>>> outputs = model(inputs)
|
||||
>>> loss, scores = outputs[:2]
|
||||
"""
|
||||
|
||||
TF_QUESTION_ANSWERING_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
>>> input_dict = tokenizer(question, text, return_tensors='tf')
|
||||
>>> start_scores, end_scores = model(input_dict)
|
||||
|
||||
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
>>> answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
"""
|
||||
|
||||
TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||
>>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
|
||||
>>> outputs = model(inputs)
|
||||
>>> loss, logits = outputs[:2]
|
||||
"""
|
||||
|
||||
TF_MASKED_LM_SAMPLE = r"""
|
||||
Example::
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
|
||||
>>> outputs = model(input_ids)
|
||||
>>> prediction_scores = outputs[0]
|
||||
"""
|
||||
|
||||
TF_BASE_MODEL_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||
>>> outputs = model(inputs)
|
||||
|
||||
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
|
||||
TF_MULTIPLE_CHOICE_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
>>> choice0 = "It is eaten with a fork and a knife."
|
||||
>>> choice1 = "It is eaten while held in the hand."
|
||||
|
||||
>>> encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='tf', pad_to_max_length=True)
|
||||
>>> inputs = {{k: tf.expand_dims(v, 0) for k, v in encoding.items()}}
|
||||
>>> outputs = model(inputs) # batch size is 1
|
||||
|
||||
>>> # the linear classifier still needs to be trained
|
||||
>>> logits = outputs[0]
|
||||
"""
|
||||
|
||||
TF_CAUSAL_LM_SAMPLE = r"""
|
||||
Example::
|
||||
|
||||
>>> from transformers import {tokenizer_class}, {model_class}
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
|
||||
>>> model = {model_class}.from_pretrained('{checkpoint}')
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
|
||||
>>> outputs = model(inputs)
|
||||
>>> logits = outputs[0]
|
||||
"""
|
||||
|
||||
|
||||
def add_code_sample_docstrings(*docstr, tokenizer_class=None, checkpoint=None):
|
||||
def docstring_decorator(fn):
|
||||
model_class = fn.__qualname__.split(".")[0]
|
||||
is_tf_class = model_class[:2] == "TF"
|
||||
|
||||
if "SequenceClassification" in model_class:
|
||||
code_sample = TF_SEQUENCE_CLASSIFICATION_SAMPLE if is_tf_class else PT_SEQUENCE_CLASSIFICATION_SAMPLE
|
||||
elif "QuestionAnswering" in model_class:
|
||||
code_sample = TF_QUESTION_ANSWERING_SAMPLE if is_tf_class else PT_QUESTION_ANSWERING_SAMPLE
|
||||
elif "TokenClassification" in model_class:
|
||||
code_sample = TF_TOKEN_CLASSIFICATION_SAMPLE if is_tf_class else PT_TOKEN_CLASSIFICATION_SAMPLE
|
||||
elif "MultipleChoice" in model_class:
|
||||
code_sample = TF_MULTIPLE_CHOICE_SAMPLE if is_tf_class else PT_MULTIPLE_CHOICE_SAMPLE
|
||||
elif "MaskedLM" in model_class:
|
||||
code_sample = TF_MASKED_LM_SAMPLE if is_tf_class else PT_MASKED_LM_SAMPLE
|
||||
elif "LMHead" in model_class:
|
||||
code_sample = TF_CAUSAL_LM_SAMPLE if is_tf_class else PT_CAUSAL_LM_SAMPLE
|
||||
elif "Model" in model_class:
|
||||
code_sample = TF_BASE_MODEL_SAMPLE if is_tf_class else PT_BASE_MODEL_SAMPLE
|
||||
else:
|
||||
raise ValueError(f"Docstring can't be built for model {model_class}")
|
||||
|
||||
built_doc = code_sample.format(model_class=model_class, tokenizer_class=tokenizer_class, checkpoint=checkpoint)
|
||||
fn.__doc__ = (fn.__doc__ or "") + "".join(docstr) + built_doc
|
||||
return fn
|
||||
|
||||
return docstring_decorator
|
||||
|
||||
|
||||
def is_remote_url(url_or_filename):
|
||||
parsed = urlparse(url_or_filename)
|
||||
return parsed.scheme in ("http", "https")
|
||||
|
||||
@@ -24,13 +24,15 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .configuration_albert import AlbertConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_bert import ACT2FN, BertEmbeddings, BertSelfAttention, prune_linear_layer
|
||||
from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "AlbertTokenizer"
|
||||
|
||||
|
||||
ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"albert-base-v1",
|
||||
@@ -485,6 +487,7 @@ class AlbertModel(AlbertPreTrainedModel):
|
||||
self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -521,18 +524,6 @@ class AlbertModel(AlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import AlbertModel, AlbertTokenizer
|
||||
import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertModel.from_pretrained('albert-base-v2')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
@@ -657,16 +648,16 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import AlbertTokenizer, AlbertForPreTraining
|
||||
import torch
|
||||
>>> from transformers import AlbertTokenizer, AlbertForPreTraining
|
||||
>>> import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertForPreTraining.from_pretrained('albert-base-v2')
|
||||
>>> tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
>>> model = AlbertForPreTraining.from_pretrained('albert-base-v2')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> outputs = model(input_ids)
|
||||
|
||||
prediction_scores, sop_scores = outputs[:2]
|
||||
>>> prediction_scores, sop_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
@@ -763,6 +754,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
|
||||
return self.predictions.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -802,18 +794,6 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Example::
|
||||
|
||||
from transformers import AlbertTokenizer, AlbertForMaskedLM
|
||||
import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertForMaskedLM.from_pretrained('albert-base-v2')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
loss, prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
@@ -863,6 +843,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -899,19 +880,6 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import AlbertTokenizer, AlbertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertForSequenceClassification.from_pretrained('albert-base-v2')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.albert(
|
||||
@@ -962,6 +930,7 @@ class AlbertForTokenClassification(AlbertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -996,21 +965,6 @@ class AlbertForTokenClassification(AlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import AlbertTokenizer, AlbertForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertForTokenClassification.from_pretrained('albert-base-v2')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.albert(
|
||||
@@ -1062,6 +1016,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1104,21 +1059,6 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
# The checkpoint albert-base-v2 is not fine-tuned for question answering. Please see the
|
||||
# examples/question-answering/run_squad.py example to see how to fine-tune a model to a question answering task.
|
||||
|
||||
from transformers import AlbertTokenizer, AlbertForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertForQuestionAnswering.from_pretrained('albert-base-v2')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='pt')
|
||||
start_scores, end_scores = model(**input_dict)
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.albert(
|
||||
@@ -1176,6 +1116,7 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1213,25 +1154,6 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import AlbertTokenizer, AlbertForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = AlbertForMultipleChoice.from_pretrained('albert-base-v2')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
choice0 = "It is eaten with a fork and a knife."
|
||||
choice1 = "It is eaten while held in the hand."
|
||||
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
|
||||
|
||||
encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||
|
||||
# the linear classifier still needs to be trained
|
||||
loss, logits = outputs[:2]
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
|
||||
@@ -392,8 +392,8 @@ class AutoModel:
|
||||
|
||||
Examples::
|
||||
|
||||
config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
|
||||
model = AutoModel.from_config(config) # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
>>> config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
|
||||
>>> model = AutoModel.from_config(config) # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
"""
|
||||
for config_class, model_class in MODEL_MAPPING.items():
|
||||
if isinstance(config, config_class):
|
||||
@@ -480,8 +480,7 @@ class AutoModel:
|
||||
Examples::
|
||||
|
||||
model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
|
||||
model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
assert model.config.output_attention == True
|
||||
assert model.config.output_attentions == True
|
||||
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
|
||||
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
|
||||
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
|
||||
@@ -547,8 +546,8 @@ class AutoModelForPreTraining:
|
||||
|
||||
Examples::
|
||||
|
||||
config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
|
||||
model = AutoModelForPreTraining.from_config(config) # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
>>> config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
|
||||
>>> model = AutoModelForPreTraining.from_config(config) # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
"""
|
||||
for config_class, model_class in MODEL_FOR_PRETRAINING_MAPPING.items():
|
||||
if isinstance(config, config_class):
|
||||
|
||||
@@ -27,12 +27,19 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import ACT2FN
|
||||
from .configuration_bart import BartConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_end_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_utils import PreTrainedModel
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "BartTokenizer"
|
||||
|
||||
|
||||
BART_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"facebook/bart-large",
|
||||
@@ -56,14 +63,17 @@ BART_START_DOCSTRING = r"""
|
||||
|
||||
"""
|
||||
BART_GENERATION_EXAMPLE = r"""
|
||||
Examples::
|
||||
Summarization example::
|
||||
|
||||
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig
|
||||
|
||||
# see ``examples/summarization/bart/run_eval.py`` for a longer example
|
||||
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
|
||||
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
|
||||
|
||||
ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
|
||||
inputs = tokenizer.batch_encode_plus([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')
|
||||
inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')
|
||||
|
||||
# Generate Summary
|
||||
summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True)
|
||||
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])
|
||||
@@ -807,6 +817,7 @@ class BartModel(PretrainedBartModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/bart-large")
|
||||
def forward(
|
||||
self,
|
||||
input_ids,
|
||||
@@ -883,8 +894,7 @@ class BartModel(PretrainedBartModel):
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"The BART Model with a language modeling head. Can be used for summarization.",
|
||||
BART_START_DOCSTRING + BART_GENERATION_EXAMPLE,
|
||||
"The BART Model with a language modeling head. Can be used for summarization.", BART_START_DOCSTRING
|
||||
)
|
||||
class BartForConditionalGeneration(PretrainedBartModel):
|
||||
base_model_prefix = "model"
|
||||
@@ -911,6 +921,7 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
||||
self.register_buffer("final_logits_bias", new_bias)
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_end_docstrings(BART_GENERATION_EXAMPLE)
|
||||
def forward(
|
||||
self,
|
||||
input_ids,
|
||||
@@ -951,18 +962,21 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
Conditional generation example::
|
||||
|
||||
# Mask filling only works for bart-large
|
||||
from transformers import BartTokenizer, BartForConditionalGeneration
|
||||
tokenizer = BartTokenizer.from_pretrained('bart-large')
|
||||
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
|
||||
TXT = "My friends are <mask> but they eat too many carbs."
|
||||
model = BartForConditionalGeneration.from_pretrained('bart-large')
|
||||
input_ids = tokenizer.batch_encode_plus([TXT], return_tensors='pt')['input_ids']
|
||||
|
||||
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
|
||||
input_ids = tokenizer([TXT], return_tensors='pt')['input_ids']
|
||||
logits = model(input_ids)[0]
|
||||
|
||||
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
|
||||
probs = logits[0, masked_index].softmax(dim=0)
|
||||
values, predictions = probs.topk(5)
|
||||
|
||||
tokenizer.decode(predictions).split()
|
||||
# ['good', 'great', 'all', 'really', 'very']
|
||||
"""
|
||||
@@ -1068,6 +1082,7 @@ class BartForSequenceClassification(PretrainedBartModel):
|
||||
self.model._init_weights(self.classification_head.out_proj)
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/bart-large")
|
||||
def forward(
|
||||
self,
|
||||
input_ids,
|
||||
@@ -1088,32 +1103,19 @@ class BartForSequenceClassification(PretrainedBartModel):
|
||||
|
||||
Returns:
|
||||
:obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BartConfig`) and inputs:
|
||||
loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided):
|
||||
Classification loss (cross entropy)
|
||||
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
|
||||
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
||||
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||
attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
|
||||
Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BartTokenizer, BartForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = BartTokenizer.from_pretrained('bart-large')
|
||||
model = BartForSequenceClassification.from_pretrained('bart-large')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute",
|
||||
add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided):
|
||||
Classification loss (cross entropy)
|
||||
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
|
||||
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
||||
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``):
|
||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||
attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or ``config.output_attentions=True``):
|
||||
Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the
|
||||
self-attention
|
||||
heads.
|
||||
"""
|
||||
if labels is not None:
|
||||
use_cache = False
|
||||
@@ -1161,6 +1163,7 @@ class BartForQuestionAnswering(PretrainedBartModel):
|
||||
self.model._init_weights(self.qa_outputs)
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/bart-large")
|
||||
def forward(
|
||||
self,
|
||||
input_ids,
|
||||
@@ -1200,25 +1203,6 @@ class BartForQuestionAnswering(PretrainedBartModel):
|
||||
Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
# The checkpoint bart-large is not fine-tuned for question answering. Please see the
|
||||
# examples/question-answering/run_squad.py example to see how to fine-tune a model to a question answering task.
|
||||
|
||||
from transformers import BartTokenizer, BartForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
|
||||
model = BartForQuestionAnswering.from_pretrained('facebook/bart-large')
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_ids = tokenizer.encode(question, text)
|
||||
start_scores, end_scores = model(torch.tensor([input_ids]))
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
|
||||
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
|
||||
|
||||
"""
|
||||
if start_positions is not None and end_positions is not None:
|
||||
use_cache = False
|
||||
@@ -1259,7 +1243,7 @@ class BartForQuestionAnswering(PretrainedBartModel):
|
||||
total_loss = (start_loss + end_loss) / 2
|
||||
outputs = (total_loss,) + outputs
|
||||
|
||||
return outputs # (loss), start_logits, end_logits, (hidden_states), (attentions)
|
||||
return outputs # return outputs # (loss), start_logits, end_logits, encoder_outputs, (hidden_states), (attentions)
|
||||
|
||||
|
||||
class SinusoidalPositionalEmbedding(nn.Embedding):
|
||||
|
||||
@@ -28,12 +28,14 @@ from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .activations import gelu, gelu_new, swish
|
||||
from .configuration_bert import BertConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
||||
|
||||
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"bert-base-uncased",
|
||||
"bert-large-uncased",
|
||||
@@ -664,6 +666,7 @@ class BertModel(BertPreTrainedModel):
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -702,20 +705,6 @@ class BertModel(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertModel, BertTokenizer
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertModel.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -851,16 +840,16 @@ class BertForPreTraining(BertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForPreTraining
|
||||
import torch
|
||||
>>> from transformers import BertTokenizer, BertForPreTraining
|
||||
>>> import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForPreTraining.from_pretrained('bert-base-uncased')
|
||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
@@ -958,19 +947,20 @@ class BertLMHeadModel(BertPreTrainedModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
Example::
|
||||
|
||||
from transformers import BertTokenizer, BertLMHeadModel
|
||||
import torch
|
||||
>>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
|
||||
>>> import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertLMHeadModel.from_pretrained('bert-base-uncased', is_decoder=True)
|
||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
|
||||
>>> config = BertConfig.from_pretrained("bert-base-cased")
|
||||
>>> config.is_decoder = True
|
||||
>>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config)
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
|
||||
loss, prediction_scores = outputs[:2]
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
|
||||
outputs = self.bert(
|
||||
@@ -1028,6 +1018,7 @@ class BertForMaskedLM(BertPreTrainedModel):
|
||||
return self.cls.predictions.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1069,20 +1060,6 @@ class BertForMaskedLM(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForMaskedLM
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
|
||||
loss, prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
@@ -1185,18 +1162,18 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForNextSentencePrediction
|
||||
import torch
|
||||
>>> from transformers import BertTokenizer, BertForNextSentencePrediction
|
||||
>>> import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
|
||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||
encoding = tokenizer.encode_plus(prompt, next_sentence, return_tensors='pt')
|
||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
|
||||
|
||||
loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
||||
assert logits[0, 0] < logits[0, 1] # next sentence was random
|
||||
>>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
||||
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
|
||||
"""
|
||||
|
||||
outputs = self.bert(
|
||||
@@ -1240,6 +1217,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1276,21 +1254,6 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.bert(
|
||||
@@ -1340,6 +1303,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1377,25 +1341,6 @@ class BertForMultipleChoice(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
choice0 = "It is eaten with a fork and a knife."
|
||||
choice1 = "It is eaten while held in the hand."
|
||||
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
|
||||
|
||||
encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||
|
||||
# the linear classifier still needs to be trained
|
||||
loss, logits = outputs[:2]
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
@@ -1453,6 +1398,7 @@ class BertForTokenClassification(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1487,21 +1433,6 @@ class BertForTokenClassification(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.bert(
|
||||
@@ -1554,6 +1485,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1596,25 +1528,6 @@ class BertForQuestionAnswering(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
encoding = tokenizer.encode_plus(question, text)
|
||||
input_ids, token_type_ids = encoding["input_ids"], encoding["token_type_ids"]
|
||||
start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([token_type_ids]))
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
|
||||
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
|
||||
|
||||
assert answer == "a nice puppet"
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.bert(
|
||||
|
||||
@@ -31,6 +31,8 @@ from .modeling_roberta import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "CamembertTokenizer"
|
||||
|
||||
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"camembert-base",
|
||||
"Musixmatch/umberto-commoncrawl-cased-v1",
|
||||
|
||||
@@ -24,12 +24,14 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .configuration_ctrl import CTRLConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "CTRLTokenizer"
|
||||
|
||||
CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"ctrl"
|
||||
# See all CTRL models at https://huggingface.co/models?filter=ctrl
|
||||
@@ -326,6 +328,7 @@ class CTRLModel(CTRLPreTrainedModel):
|
||||
self.h[layer].multi_head_attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -358,20 +361,6 @@ class CTRLModel(CTRLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import CTRLTokenizer, CTRLModel
|
||||
import torch
|
||||
|
||||
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
|
||||
model = CTRLModel.from_pretrained('ctrl')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
@@ -510,6 +499,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
||||
return {"input_ids": input_ids, "past": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -552,19 +542,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import torch
|
||||
from transformers import CTRLTokenizer, CTRLLMHeadModel
|
||||
|
||||
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
|
||||
model = CTRLLMHeadModel.from_pretrained('ctrl')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
|
||||
@@ -30,12 +30,13 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import gelu
|
||||
from .configuration_distilbert import DistilBertConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "DistilBertTokenizer"
|
||||
|
||||
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"distilbert-base-uncased",
|
||||
@@ -409,6 +410,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
|
||||
self.transformer.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -434,20 +436,6 @@ class DistilBertModel(DistilBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import DistilBertTokenizer, DistilBertModel
|
||||
import torch
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = DistilBertModel.from_pretrained('distilbert-base-cased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -506,6 +494,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
|
||||
return self.vocab_projector
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -544,17 +533,6 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import DistilBertTokenizer, DistilBertForMaskedLM
|
||||
import torch
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = DistilBertForMaskedLM.from_pretrained('distilbert-base-cased')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
loss, prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
@@ -604,6 +582,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -639,18 +618,6 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-cased')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
distilbert_output = self.distilbert(
|
||||
input_ids=input_ids,
|
||||
@@ -697,6 +664,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -737,20 +705,6 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-cased')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
start_positions = torch.tensor([1])
|
||||
end_positions = torch.tensor([3])
|
||||
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
loss, start_scores, end_scores = outputs[:3]
|
||||
|
||||
"""
|
||||
distilbert_output = self.distilbert(
|
||||
input_ids=input_ids,
|
||||
@@ -806,6 +760,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -838,19 +793,6 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import DistilBertTokenizer, DistilBertForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = DistilBertForTokenClassification.from_pretrained('distilbert-base-cased')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.distilbert(
|
||||
@@ -940,22 +882,23 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import DistilBertTokenizer, DistilBertForMultipleChoice
|
||||
import torch
|
||||
>>> from transformers import DistilBertTokenizer, DistilBertForMultipleChoice
|
||||
>>> import torch
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased')
|
||||
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
choice0 = "It is eaten with a fork and a knife."
|
||||
choice1 = "It is eaten while held in the hand."
|
||||
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
|
||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
>>> choice0 = "It is eaten with a fork and a knife."
|
||||
>>> choice1 = "It is eaten while held in the hand."
|
||||
>>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
|
||||
|
||||
encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||
>>> encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
>>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||
|
||||
>>> # the linear classifier still needs to be trained
|
||||
>>> loss, logits = outputs[:2]
|
||||
|
||||
# the linear classifier still needs to be trained
|
||||
loss, logits = outputs[:2]
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
|
||||
@@ -8,13 +8,14 @@ from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .activations import get_activation
|
||||
from .configuration_electra import ElectraConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_bert import BertEmbeddings, BertEncoder, BertLayerNorm, BertPreTrainedModel
|
||||
from .modeling_utils import SequenceSummary
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "ElectraTokenizer"
|
||||
|
||||
ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"google/electra-small-generator",
|
||||
@@ -264,6 +265,7 @@ class ElectraModel(ElectraPreTrainedModel):
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -291,20 +293,6 @@ class ElectraModel(ElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ElectraModel, ElectraTokenizer
|
||||
import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
|
||||
model = ElectraModel.from_pretrained('google/electra-small-discriminator')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
@@ -383,6 +371,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -419,21 +408,6 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = ElectraForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
discriminator_hidden_states = self.electra(
|
||||
input_ids,
|
||||
@@ -521,16 +495,14 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ElectraTokenizer, ElectraForPreTraining
|
||||
import torch
|
||||
>>> from transformers import ElectraTokenizer, ElectraForPreTraining
|
||||
>>> import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
|
||||
model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
|
||||
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
|
||||
>>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> scores = model(input_ids)[0]
|
||||
|
||||
"""
|
||||
|
||||
@@ -589,6 +561,7 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
|
||||
return self.generator_lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-generator")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -628,20 +601,6 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ElectraTokenizer, ElectraForMaskedLM
|
||||
import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-generator')
|
||||
model = ElectraForMaskedLM.from_pretrained('google/electra-small-generator')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
|
||||
loss, prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
@@ -696,6 +655,7 @@ class ElectraForTokenClassification(ElectraPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -730,21 +690,6 @@ class ElectraForTokenClassification(ElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ElectraTokenizer, ElectraForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
|
||||
model = ElectraForTokenClassification.from_pretrained('google/electra-small-discriminator')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
discriminator_hidden_states = self.electra(
|
||||
@@ -802,6 +747,7 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -844,23 +790,6 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ElectraTokenizer, ElectraForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
|
||||
model = ElectraForQuestionAnswering.from_pretrained('google/electra-base-discriminator')
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
encoding = tokenizer.encode_plus(question, text, return_tensors='pt')
|
||||
input_ids, token_type_ids = encoding['input_ids'], encoding['token_type_ids']
|
||||
start_scores, end_scores = model(input_ids, token_type_ids=token_type_ids)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze(0))
|
||||
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
|
||||
|
||||
"""
|
||||
|
||||
discriminator_hidden_states = self.electra(
|
||||
@@ -918,6 +847,7 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -954,25 +884,6 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ElectraTokenizer, ElectraForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
|
||||
model = ElectraForMultipleChoice.from_pretrained('google/electra-base-discriminator')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
choice0 = "It is eaten with a fork and a knife."
|
||||
choice1 = "It is eaten while held in the hand."
|
||||
labels = torch.tensor(0) # choice0 is correct (according to Wikipedia ;))
|
||||
|
||||
encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||
|
||||
# the linear classifier still needs to be trained
|
||||
loss, logits = outputs[:2]
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
|
||||
@@ -126,9 +126,8 @@ class EncoderDecoderModel(PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import EncoderDecoder
|
||||
|
||||
model = EncoderDecoder.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert
|
||||
>>> from transformers import EncoderDecoderModel
|
||||
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert
|
||||
"""
|
||||
|
||||
kwargs_encoder = {
|
||||
@@ -244,21 +243,21 @@ class EncoderDecoderModel(PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import EncoderDecoderModel, BertTokenizer
|
||||
import torch
|
||||
>>> from transformers import EncoderDecoderModel, BertTokenizer
|
||||
>>> import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert
|
||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert
|
||||
|
||||
# forward
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
||||
>>> # forward
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
||||
|
||||
# training
|
||||
loss, outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, lm_labels=input_ids)[:2]
|
||||
>>> # training
|
||||
>>> loss, outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)[:2]
|
||||
|
||||
# generation
|
||||
generated = model.generate(input_ids, decoder_start_token_id=model.config.decoder.pad_token_id)
|
||||
>>> # generation
|
||||
>>> generated = model.generate(input_ids, decoder_start_token_id=model.config.decoder.pad_token_id)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ import torch
|
||||
from torch.nn import functional as F
|
||||
|
||||
from .configuration_flaubert import FlaubertConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_xlm import (
|
||||
XLMForQuestionAnswering,
|
||||
XLMForQuestionAnsweringSimple,
|
||||
@@ -35,6 +35,8 @@ from .modeling_xlm import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "FlaubertTokenizer"
|
||||
|
||||
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"flaubert/flaubert_small_cased",
|
||||
"flaubert/flaubert_base_uncased",
|
||||
@@ -119,6 +121,7 @@ class FlaubertModel(XLMModel):
|
||||
self.pre_norm = getattr(config, "pre_norm", False)
|
||||
|
||||
@add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="flaubert/flaubert_base_cased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -149,18 +152,6 @@ class FlaubertModel(XLMModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import FlaubertTokenizer, FlaubertModel
|
||||
import torch
|
||||
|
||||
tokenizer = FlaubertTokenizer.from_pretrained('flaubert-base-cased')
|
||||
model = FlaubertModel.from_pretrained('flaubert-base-cased')
|
||||
input_ids = torch.tensor(tokenizer.encode("Le chat mange une pomme.", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
|
||||
@@ -26,7 +26,7 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import ACT2FN
|
||||
from .configuration_gpt2 import GPT2Config
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import (
|
||||
Conv1D,
|
||||
PreTrainedModel,
|
||||
@@ -38,6 +38,8 @@ from .modeling_utils import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
||||
|
||||
GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"gpt2",
|
||||
"gpt2-medium",
|
||||
@@ -370,6 +372,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
self.h[layer].attn.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -403,18 +406,6 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import GPT2Tokenizer, GPT2Model
|
||||
import torch
|
||||
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
model = GPT2Model.from_pretrained('gpt2')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -553,6 +544,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
||||
return {"input_ids": input_ids, "past": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -595,19 +587,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import torch
|
||||
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
||||
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -721,26 +700,26 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
import torch
|
||||
from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
|
||||
>>> import torch
|
||||
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
|
||||
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
|
||||
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
|
||||
|
||||
# Add a [CLS] to the vocabulary (we should train it also!)
|
||||
tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||
model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
|
||||
print(tokenizer.cls_token_id, len(tokenizer)) # The newly token the last token of the vocabulary
|
||||
>>> # Add a [CLS] to the vocabulary (we should train it also!)
|
||||
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||
|
||||
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
encoded_choices = [tokenizer.encode(s) for s in choices]
|
||||
cls_token_location = [tokens.index(tokenizer.cls_token_id) for tokens in encoded_choices]
|
||||
>>> embedding_layer = model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
|
||||
|
||||
input_ids = torch.tensor(encoded_choices).unsqueeze(0) # Batch size: 1, number of choices: 2
|
||||
mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1
|
||||
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
>>> encoded_choices = [tokenizer.encode(s) for s in choices]
|
||||
>>> cls_token_location = [tokens.index(tokenizer.cls_token_id) for tokens in encoded_choices]
|
||||
|
||||
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
>>> input_ids = torch.tensor(encoded_choices).unsqueeze(0) # Batch size: 1, number of choices: 2
|
||||
>>> mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1
|
||||
|
||||
>>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "lm_labels" in kwargs:
|
||||
|
||||
@@ -24,13 +24,15 @@ from torch.nn import CrossEntropyLoss, MSELoss
|
||||
from torch.nn import functional as F
|
||||
|
||||
from .configuration_longformer import LongformerConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_bert import BertPreTrainedModel
|
||||
from .modeling_roberta import RobertaLMHead, RobertaModel
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "LongformerTokenizer"
|
||||
|
||||
LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"allenai/longformer-base-4096",
|
||||
"allenai/longformer-large-4096",
|
||||
@@ -609,22 +611,22 @@ class LongformerModel(RobertaModel):
|
||||
|
||||
Examples::
|
||||
|
||||
import torch
|
||||
from transformers import LongformerModel, LongformerTokenizer
|
||||
>>> import torch
|
||||
>>> from transformers import LongformerModel, LongformerTokenizer
|
||||
|
||||
model = LongformerModel.from_pretrained('allenai/longformer-base-4096')
|
||||
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096')
|
||||
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
|
||||
SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
||||
input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
|
||||
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
||||
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
|
||||
|
||||
# Attention mask values -- 0: no attention, 1: local attention, 2: global attention
|
||||
attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention
|
||||
attention_mask[:, [1, 4, 21,]] = 2 # Set global attention based on the task. For example,
|
||||
# classification: the <s> token
|
||||
# QA: question tokens
|
||||
# LM: potentially on the beginning of sentences and paragraphs
|
||||
sequence_output, pooled_output = model(input_ids, attention_mask=attention_mask)
|
||||
>>> # Attention mask values -- 0: no attention, 1: local attention, 2: global attention
|
||||
>>> attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention
|
||||
>>> attention_mask[:, [1, 4, 21,]] = 2 # Set global attention based on the task. For example,
|
||||
... # classification: the <s> token
|
||||
... # QA: question tokens
|
||||
... # LM: potentially on the beginning of sentences and paragraphs
|
||||
>>> sequence_output, pooled_output = model(input_ids, attention_mask=attention_mask)
|
||||
"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
@@ -743,18 +745,18 @@ class LongformerForMaskedLM(BertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
import torch
|
||||
from transformers import LongformerForMaskedLM, LongformerTokenizer
|
||||
>>> import torch
|
||||
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
|
||||
|
||||
model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096')
|
||||
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096')
|
||||
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
|
||||
SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
||||
input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
|
||||
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
|
||||
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
|
||||
|
||||
attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
|
||||
# check ``LongformerModel.forward`` for more details how to set `attention_mask`
|
||||
loss, prediction_scores = model(input_ids, attention_mask=attention_mask, labels=input_ids)
|
||||
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
|
||||
... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
|
||||
>>> loss, prediction_scores = model(input_ids, attention_mask=attention_mask, labels=input_ids)
|
||||
"""
|
||||
|
||||
if "masked_lm_labels" in kwargs:
|
||||
@@ -807,6 +809,7 @@ class LongformerForSequenceClassification(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -843,19 +846,6 @@ class LongformerForSequenceClassification(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import LongformerTokenizer, LongformerForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
model = LongformerForSequenceClassification.from_pretrained('allenai/longformer-base-4096')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
if global_attention_mask is None:
|
||||
@@ -973,25 +963,25 @@ class LongformerForQuestionAnswering(BertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import LongformerTokenizer, LongformerForQuestionAnswering
|
||||
import torch
|
||||
>>> from transformers import LongformerTokenizer, LongformerForQuestionAnswering
|
||||
>>> import torch
|
||||
|
||||
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
||||
model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
||||
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
||||
>>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
encoding = tokenizer.encode_plus(question, text, return_tensors="pt")
|
||||
input_ids = encoding["input_ids"]
|
||||
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
>>> encoding = tokenizer.encode_plus(question, text, return_tensors="pt")
|
||||
>>> input_ids = encoding["input_ids"]
|
||||
|
||||
# default is local attention everywhere
|
||||
# the forward method will automatically set global attention on question tokens
|
||||
attention_mask = encoding["attention_mask"]
|
||||
>>> # default is local attention everywhere
|
||||
>>> # the forward method will automatically set global attention on question tokens
|
||||
>>> attention_mask = encoding["attention_mask"]
|
||||
|
||||
start_scores, end_scores = model(input_ids, attention_mask=attention_mask)
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
|
||||
>>> start_scores, end_scores = model(input_ids, attention_mask=attention_mask)
|
||||
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
|
||||
|
||||
answer_tokens = all_tokens[torch.argmax(start_scores) :torch.argmax(end_scores)+1]
|
||||
answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token
|
||||
>>> answer_tokens = all_tokens[torch.argmax(start_scores) :torch.argmax(end_scores)+1]
|
||||
>>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token
|
||||
|
||||
"""
|
||||
|
||||
@@ -1060,6 +1050,7 @@ class LongformerForTokenClassification(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1094,19 +1085,6 @@ class LongformerForTokenClassification(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import LongformerTokenizer, LongformerForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
model = LongformerForTokenClassification.from_pretrained('allenai/longformer-base-4096')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.longformer(
|
||||
@@ -1163,6 +1141,7 @@ class LongformerForMultipleChoice(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1200,23 +1179,6 @@ class LongformerForMultipleChoice(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import LongformerTokenizer, LongformerForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
|
||||
model = LongformerForMultipleChoice.from_pretrained('allenai/longformer-base-4096')
|
||||
# context = "The dog is cute" | choice = "the dog" / "the cat"
|
||||
choices = [("The dog is cute", "the dog"), ("The dog is cute", "the cat")]
|
||||
input_ids = torch.tensor([tokenizer.encode(s[0], s[1], add_special_tokens=True) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
||||
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
|
||||
|
||||
# global attention is automatically put on "the dog" and "the cat"
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
|
||||
@@ -31,18 +31,18 @@ class MarianMTModel(BartForConditionalGeneration):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MarianTokenizer, MarianMTModel
|
||||
from typing import List
|
||||
src = 'fr' # source language
|
||||
trg = 'en' # target language
|
||||
sample_text = "où est l'arrêt de bus ?"
|
||||
mname = f'Helsinki-NLP/opus-mt-{src}-{trg}'
|
||||
>>> from transformers import MarianTokenizer, MarianMTModel
|
||||
>>> from typing import List
|
||||
>>> src = 'fr' # source language
|
||||
>>> trg = 'en' # target language
|
||||
>>> sample_text = "où est l'arrêt de bus ?"
|
||||
>>> mname = f'Helsinki-NLP/opus-mt-{src}-{trg}'
|
||||
|
||||
model = MarianMTModel.from_pretrained(mname)
|
||||
tok = MarianTokenizer.from_pretrained(mname)
|
||||
batch = tok.prepare_translation_batch(src_texts=[sample_text]) # don't need tgt_text for inference
|
||||
gen = model.generate(**batch) # for forward pass: model(**batch)
|
||||
words: List[str] = tok.batch_decode(gen, skip_special_tokens=True) # returns "Where is the the bus stop ?"
|
||||
>>> model = MarianMTModel.from_pretrained(mname)
|
||||
>>> tok = MarianTokenizer.from_pretrained(mname)
|
||||
>>> batch = tok.prepare_translation_batch(src_texts=[sample_text]) # don't need tgt_text for inference
|
||||
>>> gen = model.generate(**batch) # for forward pass: model(**batch)
|
||||
>>> words: List[str] = tok.batch_decode(gen, skip_special_tokens=True) # returns "Where is the the bus stop ?"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -34,11 +34,14 @@ from transformers.modeling_bert import BertIntermediate
|
||||
|
||||
from .activations import gelu, gelu_new, swish
|
||||
from .configuration_mobilebert import MobileBertConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
||||
|
||||
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
|
||||
|
||||
|
||||
@@ -747,6 +750,7 @@ class MobileBertModel(MobileBertPreTrainedModel):
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -785,20 +789,6 @@ class MobileBertModel(MobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MobileBertModel, MobileBertTokenizer
|
||||
import torch
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained(model_name_or_path)
|
||||
model = MobileBertModel.from_pretrained(model_name_or_path)
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
@@ -951,13 +941,17 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
from transformers import MobileBertTokenizer, MobileBertForPreTraining
|
||||
import torch
|
||||
tokenizer = MobileBertTokenizer.from_pretrained(model_name_or_path)
|
||||
model = MobileBertForPreTraining.from_pretrained(model_name_or_path)
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
|
||||
>>> from transformers import MobileBertTokenizer, MobileBertForPreTraining
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
|
||||
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
|
||||
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> outputs = model(input_ids)
|
||||
|
||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
outputs = self.mobilebert(
|
||||
@@ -1022,6 +1016,7 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
||||
self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings())
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1063,20 +1058,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MobileBertTokenizer, MobileBertForMaskedLM
|
||||
import torch
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = MobileBertForMaskedLM.from_pretrained('mobilebert-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
|
||||
loss, prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
@@ -1174,18 +1155,17 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MobileBertTokenizer, MobileBertForNextSentencePrediction
|
||||
import torch
|
||||
>>> from transformers import MobileBertTokenizer, MobileBertForNextSentencePrediction
|
||||
>>> import torch
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = MobileBertForNextSentencePrediction.from_pretrained('mobilebert-uncased')
|
||||
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
|
||||
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||
encoding = tokenizer.encode_plus(prompt, next_sentence, return_tensors='pt')
|
||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
|
||||
|
||||
loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
||||
assert logits[0, 0] < logits[0, 1] # next sentence was random
|
||||
>>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
||||
"""
|
||||
|
||||
outputs = self.mobilebert(
|
||||
@@ -1228,6 +1208,7 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1263,20 +1244,6 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import BertTokenizer, BertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, logits = outputs[:2]
|
||||
"""
|
||||
|
||||
outputs = self.mobilebert(
|
||||
@@ -1321,6 +1288,7 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1363,25 +1331,6 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MobileBertTokenizer, MobileBertForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained(model_name_or_path)
|
||||
model = MobileBertForQuestionAnswering.from_pretrained(model_name_or_path)
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
encoding = tokenizer.encode_plus(question, text)
|
||||
input_ids, token_type_ids = encoding["input_ids"], encoding["token_type_ids"]
|
||||
start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([token_type_ids]))
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
|
||||
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
|
||||
|
||||
assert answer == "a nice puppet"
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.mobilebert(
|
||||
@@ -1439,6 +1388,7 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1476,25 +1426,6 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MobileBertTokenizer, MobileBertForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = MobileBertForMultipleChoice.from_pretrained('mobilebert-uncased')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
choice0 = "It is eaten with a fork and a knife."
|
||||
choice1 = "It is eaten while held in the hand."
|
||||
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
|
||||
|
||||
encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
|
||||
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||
|
||||
# the linear classifier still needs to be trained
|
||||
loss, logits = outputs[:2]
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
@@ -1552,6 +1483,7 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1586,21 +1518,6 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MobileBertTokenizer, MobileBertForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = MobileBertForTokenClassification.from_pretrained('mobilebert-uncased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.mobilebert(
|
||||
|
||||
@@ -28,7 +28,7 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import gelu_new, swish
|
||||
from .configuration_openai import OpenAIGPTConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import (
|
||||
Conv1D,
|
||||
PreTrainedModel,
|
||||
@@ -40,6 +40,8 @@ from .modeling_utils import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "OpenAIGPTTokenizer"
|
||||
|
||||
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"openai-gpt",
|
||||
# See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt
|
||||
@@ -356,6 +358,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
|
||||
self.h[layer].attn.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -383,18 +386,6 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import OpenAIGPTTokenizer, OpenAIGPTModel
|
||||
import torch
|
||||
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||
model = OpenAIGPTModel.from_pretrained('openai-gpt')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -490,6 +481,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -531,18 +523,6 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
|
||||
import torch
|
||||
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
|
||||
@@ -29,12 +29,20 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import gelu, gelu_fast, gelu_new, swish
|
||||
from .configuration_reformer import ReformerConfig
|
||||
from .file_utils import DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
DUMMY_INPUTS,
|
||||
DUMMY_MASK,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_utils import PreTrainedModel, apply_chunking_to_forward
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "ReformerTokenizer"
|
||||
|
||||
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"google/reformer-crime-and-punishment",
|
||||
"google/reformer-enwik8",
|
||||
@@ -1543,6 +1551,7 @@ class ReformerModel(ReformerPreTrainedModel):
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1570,19 +1579,6 @@ class ReformerModel(ReformerPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ReformerModel, ReformerTokenizer
|
||||
import torch
|
||||
|
||||
tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment')
|
||||
model = ReformerModel.from_pretrained('google/reformer-crime-and-punishment')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
@@ -1738,6 +1734,7 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel):
|
||||
pass
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1774,19 +1771,6 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import ReformerModelWithLMHead, ReformerTokenizer
|
||||
import torch
|
||||
|
||||
tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment')
|
||||
model = ReformerModelWithLMHead.from_pretrained('google/reformer-crime-and-punishment')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
|
||||
loss, prediction_scores = outputs[:2]
|
||||
"""
|
||||
|
||||
reformer_outputs = self.reformer(
|
||||
|
||||
@@ -24,12 +24,14 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .configuration_roberta import RobertaConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_bert import BertEmbeddings, BertLayerNorm, BertModel, BertPreTrainedModel, gelu
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "RobertaTokenizer"
|
||||
|
||||
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"roberta-base",
|
||||
"roberta-large",
|
||||
@@ -177,6 +179,7 @@ class RobertaForMaskedLM(BertPreTrainedModel):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -216,18 +219,6 @@ class RobertaForMaskedLM(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import RobertaTokenizer, RobertaForMaskedLM
|
||||
import torch
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = RobertaForMaskedLM.from_pretrained('roberta-base')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=input_ids)
|
||||
loss, prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
@@ -304,6 +295,7 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -340,19 +332,6 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = RobertaForSequenceClassification.from_pretrained('roberta-base')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
outputs = self.roberta(
|
||||
input_ids,
|
||||
@@ -400,6 +379,7 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -437,20 +417,6 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import RobertaTokenizer, RobertaForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = RobertaForMultipleChoice.from_pretrained('roberta-base')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
input_ids = torch.tensor([tokenizer.encode(s, add_special_tokens=True) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
||||
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
@@ -510,6 +476,7 @@ class RobertaForTokenClassification(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -544,19 +511,6 @@ class RobertaForTokenClassification(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import RobertaTokenizer, RobertaForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = RobertaForTokenClassification.from_pretrained('roberta-base')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.roberta(
|
||||
@@ -632,6 +586,7 @@ class RobertaForQuestionAnswering(BertPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -674,25 +629,6 @@ class RobertaForQuestionAnswering(BertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
# The checkpoint roberta-large is not fine-tuned for question answering. Please see the
|
||||
# examples/question-answering/run_squad.py example to see how to fine-tune a model to a question answering task.
|
||||
|
||||
from transformers import RobertaTokenizer, RobertaForQuestionAnswering
|
||||
import torch
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = RobertaForQuestionAnswering.from_pretrained('roberta-base')
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_ids = tokenizer.encode(question, text)
|
||||
start_scores, end_scores = model(torch.tensor([input_ids]))
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
|
||||
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.roberta(
|
||||
|
||||
@@ -33,6 +33,8 @@ from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, p
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "T5Tokenizer"
|
||||
|
||||
####################################################
|
||||
# This dict contrains shortcut names and associated url
|
||||
# for the pretrained weights provided with the models
|
||||
@@ -924,16 +926,17 @@ class T5Model(T5PreTrainedModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
Example::
|
||||
|
||||
from transformers import T5Tokenizer, T5Model
|
||||
>>> from transformers import T5Tokenizer, T5Model
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
model = T5Model.from_pretrained('t5-small')
|
||||
input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
|
||||
outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
>>> model = T5Model.from_pretrained('t5-small')
|
||||
|
||||
>>> input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
||||
|
||||
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
|
||||
@@ -1068,18 +1071,18 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
|
||||
outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
|
||||
loss, prediction_scores = outputs[:2]
|
||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
>>> input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
|
||||
>>> loss, prediction_scores = outputs[:2]
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
input_ids = tokenizer.encode("summarize: Hello, my dog is cute", return_tensors="pt") # Batch size 1
|
||||
outputs = model.generate(input_ids)
|
||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
>>> input_ids = tokenizer.encode("summarize: Hello, my dog is cute", return_tensors="pt") # Batch size 1
|
||||
>>> outputs = model.generate(input_ids)
|
||||
"""
|
||||
|
||||
if "lm_labels" in kwargs:
|
||||
|
||||
@@ -21,7 +21,12 @@ import logging
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_albert import AlbertConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_bert import ACT2FN, TFBertSelfAttention
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
@@ -39,6 +44,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "AlbertTokenizer"
|
||||
|
||||
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"albert-base-v1",
|
||||
"albert-large-v1",
|
||||
@@ -713,6 +720,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
|
||||
self.albert = TFAlbertMainLayer(config, name="albert")
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -737,18 +745,6 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertModel
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertModel.from_pretrained('albert-base-v2')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.albert(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -837,6 +833,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
|
||||
return self.albert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -854,18 +851,6 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertForMaskedLM
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertForMaskedLM.from_pretrained('albert-base-v2')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores = outputs[0]
|
||||
|
||||
"""
|
||||
outputs = self.albert(inputs, **kwargs)
|
||||
|
||||
@@ -895,6 +880,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -930,19 +916,6 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertForSequenceClassification
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertForSequenceClassification.from_pretrained('albert-base-v2')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -994,6 +967,7 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1027,19 +1001,6 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertForTokenClassification
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertForTokenClassification.from_pretrained('albert-base-v2')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -1089,6 +1050,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1130,24 +1092,6 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
# The checkpoint albert-base-v2 is not fine-tuned for question answering. Please see the
|
||||
# examples/question-answering/run_squad.py example to see how to fine-tune a model to a question answering task.
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertForQuestionAnswering
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertForQuestionAnswering.from_pretrained('albert-base-v2')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[8] if len(inputs) > 8 else start_positions
|
||||
@@ -1213,6 +1157,7 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -1249,22 +1194,6 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertForMultipleChoice
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertForMultipleChoice.from_pretrained('albert-base-v2')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tokenizer(choices, add_special_tokens=True, return_tensors='tf', truncation=True, padding=True)[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
|
||||
@@ -22,7 +22,12 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_bert import BertConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
TFPreTrainedModel,
|
||||
@@ -39,6 +44,7 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
||||
|
||||
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"bert-base-uncased",
|
||||
@@ -704,6 +710,7 @@ class TFBertModel(TFBertPreTrainedModel):
|
||||
self.bert = TFBertMainLayer(config, name="bert")
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -728,18 +735,6 @@ class TFBertModel(TFBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import BertTokenizer, TFBertModel
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = TFBertModel.from_pretrained('bert-base-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
outputs = self.bert(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -819,6 +814,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
|
||||
return self.bert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -836,18 +832,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import BertTokenizer, TFBertForMaskedLM
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = TFBertForMaskedLM.from_pretrained('bert-base-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores = outputs[0]
|
||||
|
||||
"""
|
||||
outputs = self.bert(inputs, **kwargs)
|
||||
|
||||
@@ -930,6 +914,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -965,19 +950,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import BertTokenizer, TFBertForSequenceClassification
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -1037,6 +1009,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -1073,22 +1046,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import BertTokenizer, TFBertForMultipleChoice
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = TFBertForMultipleChoice.from_pretrained('bert-base-uncased')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode(s, add_special_tokens=True) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -1177,6 +1134,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1210,19 +1168,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import BertTokenizer, TFBertForTokenClassification
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = TFBertForTokenClassification.from_pretrained('bert-base-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -1273,6 +1218,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1314,22 +1260,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import BertTokenizer, TFBertForQuestionAnswering
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = TFBertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
assert answer == "a nice puppet"
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[8] if len(inputs) > 8 else start_positions
|
||||
|
||||
@@ -22,7 +22,7 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_ctrl import CTRLConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_tf_utils import (
|
||||
TFPreTrainedModel,
|
||||
TFSharedEmbeddings,
|
||||
@@ -35,6 +35,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "CtrlTokenizer"
|
||||
|
||||
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"ctrl"
|
||||
# See all CTRL models at https://huggingface.co/models?filter=ctrl
|
||||
@@ -489,6 +491,7 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
|
||||
self.transformer = TFCTRLMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -510,18 +513,6 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import CTRLTokenizer, TFCTRLModel
|
||||
|
||||
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
|
||||
model = TFCTRLModel.from_pretrained('ctrl')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.transformer(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -569,6 +560,7 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
|
||||
return {"inputs": inputs, "past": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -590,19 +582,6 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import CTRLTokenizer, TFCTRLLMHeadModel
|
||||
|
||||
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
|
||||
model = TFCTRLLMHeadModel.from_pretrained('ctrl')
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)])
|
||||
outputs = model(input_ids)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(inputs, **kwargs)
|
||||
hidden_states = transformer_outputs[0]
|
||||
|
||||
@@ -23,7 +23,12 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_distilbert import DistilBertConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
TFPreTrainedModel,
|
||||
@@ -41,6 +46,7 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "DistilBertTokenizer"
|
||||
|
||||
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"distilbert-base-uncased",
|
||||
@@ -575,6 +581,7 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
|
||||
self.distilbert = TFDistilBertMainLayer(config, name="distilbert") # Embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -592,17 +599,6 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import DistilBertTokenizer, TFDistilBertModel
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = TFDistilBertModel.from_pretrained('distilbert-base-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
outputs = self.distilbert(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -647,6 +643,7 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel):
|
||||
return self.vocab_projector.input_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
|
||||
@@ -665,18 +662,6 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import DistilBertTokenizer, TFDistilBertForMaskedLM
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = TFDistilBertForMaskedLM.from_pretrained('distilbert-base-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores = outputs[0]
|
||||
|
||||
"""
|
||||
distilbert_output = self.distilbert(inputs, **kwargs)
|
||||
|
||||
@@ -713,6 +698,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
|
||||
self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -746,19 +732,6 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[6] if len(inputs) > 6 else labels
|
||||
@@ -809,6 +782,7 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -840,19 +814,6 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import DistilBertTokenizer, TFDistilBertForTokenClassification
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = TFDistilBertForTokenClassification.from_pretrained('distilbert-base-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[6] if len(inputs) > 6 else labels
|
||||
@@ -916,6 +877,7 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -950,22 +912,6 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import DistilBertTokenizer, TFDistilBertForMultipleChoice
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
||||
model = TFDistilBertForMultipleChoice.from_pretrained('distilbert-base-uncased')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode(s, add_special_tokens=True) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -1046,6 +992,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
|
||||
self.dropout = tf.keras.layers.Dropout(config.qa_dropout)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1085,21 +1032,6 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import DistilBertTokenizer, TFDistilBertForQuestionAnswering
|
||||
|
||||
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
|
||||
model = TFDistilBertForQuestionAnswering.from_pretrained('distilbert-base-cased')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[6] if len(inputs) > 6 else start_positions
|
||||
|
||||
@@ -4,7 +4,7 @@ import tensorflow as tf
|
||||
|
||||
from transformers import ElectraConfig
|
||||
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_tf_bert import ACT2FN, TFBertEncoder, TFBertPreTrainedModel
|
||||
from .modeling_tf_utils import (
|
||||
TFQuestionAnsweringLoss,
|
||||
@@ -18,6 +18,7 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "ElectraTokenizer"
|
||||
|
||||
TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"google/electra-small-generator",
|
||||
@@ -383,6 +384,7 @@ class TFElectraModel(TFElectraPreTrainedModel):
|
||||
self.electra = TFElectraMainLayer(config, name="electra")
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -400,17 +402,6 @@ class TFElectraModel(TFElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import ElectraTokenizer, TFElectraModel
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
|
||||
model = TFElectraModel.from_pretrained('google/electra-small-discriminator')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
outputs = self.electra(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -532,6 +523,7 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel):
|
||||
return self.generator_lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-generator")
|
||||
def call(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -560,18 +552,6 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import ElectraTokenizer, TFElectraForMaskedLM
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-generator')
|
||||
model = TFElectraForMaskedLM.from_pretrained('google/electra-small-generator')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores = outputs[0]
|
||||
|
||||
"""
|
||||
|
||||
generator_hidden_states = self.electra(
|
||||
@@ -611,6 +591,7 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -644,19 +625,6 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import ElectraTokenizer, TFElectraForTokenClassification
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
|
||||
model = TFElectraForTokenClassification.from_pretrained('google/electra-small-discriminator')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -705,6 +673,7 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -746,22 +715,6 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import ElectraTokenizer, TFElectraForQuestionAnswering
|
||||
|
||||
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-generator')
|
||||
model = TFElectraForQuestionAnswering.from_pretrained('google/electra-small-generator')
|
||||
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[8] if len(inputs) > 8 else start_positions
|
||||
|
||||
@@ -22,7 +22,7 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_gpt2 import GPT2Config
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_tf_utils import (
|
||||
TFConv1D,
|
||||
TFPreTrainedModel,
|
||||
@@ -38,6 +38,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
||||
|
||||
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"gpt2",
|
||||
"gpt2-medium",
|
||||
@@ -490,6 +492,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
|
||||
self.transformer = TFGPT2MainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -511,18 +514,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import GPT2Tokenizer, TFGPT2Model
|
||||
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
model = TFGPT2Model.from_pretrained('gpt2')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.transformer(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -549,6 +540,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
|
||||
return {"inputs": inputs, "past": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -570,19 +562,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
|
||||
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
model = TFGPT2LMHeadModel.from_pretrained('gpt2')
|
||||
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
logits = outputs[0]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(inputs, **kwargs)
|
||||
hidden_states = transformer_outputs[0]
|
||||
@@ -659,29 +638,26 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
# For example purposes. Not runnable.
|
||||
import tensorflow as tf
|
||||
from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel
|
||||
>>> import tensorflow as tf
|
||||
>>> from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel
|
||||
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
model = TFGPT2DoubleHeadsModel.from_pretrained('gpt2')
|
||||
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
>>> model = TFGPT2DoubleHeadsModel.from_pretrained('gpt2')
|
||||
|
||||
# Add a [CLS] to the vocabulary (we should train it also!)
|
||||
# This option is currently not implemented in TF 2.0
|
||||
raise NotImplementedError
|
||||
tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||
model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
|
||||
print(tokenizer.cls_token_id, len(tokenizer)) # The newly token the last token of the vocabulary
|
||||
>>> # Add a [CLS] to the vocabulary (we should train it also!)
|
||||
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||
|
||||
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
encoded_choices = [tokenizer.encode(s) for s in choices]
|
||||
cls_token_location = [tokens.index(tokenizer.cls_token_id) for tokens in encoded_choices]
|
||||
>>> embedding_layer = model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
|
||||
|
||||
input_ids = tf.constant(encoded_choices)[None, :] # Batch size: 1, number of choices: 2
|
||||
mc_token_ids = tf.constant([cls_token_location]) # Batch size: 1
|
||||
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
>>> encoded_choices = [tokenizer.encode(s) for s in choices]
|
||||
>>> cls_token_location = [tokens.index(tokenizer.cls_token_id) for tokens in encoded_choices]
|
||||
|
||||
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
>>> input_ids = tf.constant(encoded_choices)[None, :] # Batch size: 1, number of choices: 2
|
||||
>>> mc_token_ids = tf.constant([cls_token_location]) # Batch size: 1
|
||||
|
||||
>>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
|
||||
@@ -21,7 +21,12 @@ import logging
|
||||
import tensorflow as tf
|
||||
|
||||
from . import MobileBertConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_bert import TFBertIntermediate, gelu, gelu_new, swish
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
@@ -39,6 +44,7 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
||||
|
||||
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"mobilebert-uncased",
|
||||
@@ -621,19 +627,6 @@ class TFMobileBertMLMHead(tf.keras.layers.Layer):
|
||||
return prediction_scores
|
||||
|
||||
|
||||
class TFMobileBertPreTrainingHeads(tf.keras.layers.Layer):
|
||||
def __init__(self, config, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.predictions = TFMobileBertLMPredictionHead(config, name="predictions")
|
||||
self.seq_relationship = tf.keras.layers.Dense(2, name="seq_relationship")
|
||||
|
||||
def call(self, inputs):
|
||||
sequence_output, pooled_output = inputs
|
||||
prediction_scores = self.predictions(sequence_output)
|
||||
seq_relationship_score = self.seq_relationship(pooled_output)
|
||||
return prediction_scores, seq_relationship_score
|
||||
|
||||
|
||||
@keras_serializable
|
||||
class TFMobileBertMainLayer(tf.keras.layers.Layer):
|
||||
config_class = MobileBertConfig
|
||||
@@ -845,6 +838,7 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel):
|
||||
self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -869,18 +863,6 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertModel
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertModel.from_pretrained('mobilebert-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
"""
|
||||
outputs = self.mobilebert(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -895,7 +877,8 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel):
|
||||
def __init__(self, config, *inputs, **kwargs):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
|
||||
self.cls = TFMobileBertPreTrainingHeads(config, name="cls")
|
||||
self.predictions = TFMobileBertMLMHead(config, name="predictions___cls")
|
||||
self.seq_relationship = TFMobileBertOnlyNSPHead(2, name="seq_relationship___cls")
|
||||
|
||||
def get_output_embeddings(self):
|
||||
return self.mobilebert.embeddings
|
||||
@@ -923,20 +906,21 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertForPreTraining
|
||||
>>> import tensorflow as tf
|
||||
>>> from transformers import MobileBertTokenizer, TFMobileBertForPreTraining
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForPreTraining.from_pretrained('mobilebert-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
|
||||
>>> model = TFMobileBertForPreTraining.from_pretrained('google/mobilebert-uncased')
|
||||
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
>>> outputs = model(input_ids)
|
||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
outputs = self.mobilebert(inputs, **kwargs)
|
||||
|
||||
sequence_output, pooled_output = outputs[:2]
|
||||
prediction_scores, seq_relationship_score = self.cls([sequence_output, pooled_output])
|
||||
prediction_scores = self.predictions(sequence_output)
|
||||
seq_relationship_score = self.seq_relationship(pooled_output)
|
||||
outputs = (prediction_scores, seq_relationship_score,) + outputs[
|
||||
2:
|
||||
] # add hidden states and attention if they are here
|
||||
@@ -956,6 +940,7 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel):
|
||||
return self.mobilebert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -973,18 +958,6 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertForMaskedLM
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForMaskedLM.from_pretrained('mobilebert-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores = outputs[0]
|
||||
|
||||
"""
|
||||
outputs = self.mobilebert(inputs, **kwargs)
|
||||
|
||||
@@ -1015,7 +988,7 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
|
||||
self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
|
||||
self.cls = TFMobileBertOnlyNSPHead(config, name="cls")
|
||||
self.cls = TFMobileBertOnlyNSPHead(config, name="seq_relationship___cls")
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
def call(self, inputs, **kwargs):
|
||||
@@ -1038,18 +1011,17 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertForNextSentencePrediction
|
||||
>>> import tensorflow as tf
|
||||
>>> from transformers import MobileBertTokenizer, TFMobileBertForNextSentencePrediction
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForNextSentencePrediction.from_pretrained('mobilebert-uncased')
|
||||
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
|
||||
>>> model = TFMobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
|
||||
|
||||
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||
encoding = tokenizer.encode_plus(prompt, next_sentence, return_tensors='tf')
|
||||
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
|
||||
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='tf')
|
||||
|
||||
logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]
|
||||
assert logits[0][0] < logits[0][1] # the next sentence was random
|
||||
>>> logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]
|
||||
"""
|
||||
outputs = self.mobilebert(inputs, **kwargs)
|
||||
|
||||
@@ -1078,6 +1050,7 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1113,19 +1086,6 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFBMobileBertForSequenceClassification
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForSequenceClassification.from_pretrained('mobilebert-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -1176,6 +1136,7 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1217,22 +1178,6 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertForQuestionAnswering
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForQuestionAnswering.from_pretrained('mobilebert-uncased') # Not a fine-tuned model! Load a fine-tuned model to obtain coherent results.
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
assert answer == "a nice puppet"
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[8] if len(inputs) > 8 else start_positions
|
||||
@@ -1298,6 +1243,7 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -1334,22 +1280,6 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertForMultipleChoice
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForMultipleChoice.from_pretrained('mobilebert-uncased')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode(s, add_special_tokens=True) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -1438,6 +1368,7 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1471,19 +1402,6 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import MobileBertTokenizer, TFMobileBertForTokenClassification
|
||||
|
||||
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
|
||||
model = TFMobileBertForTokenClassification.from_pretrained('mobilebert-uncased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
|
||||
@@ -22,7 +22,7 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_openai import OpenAIGPTConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_tf_utils import (
|
||||
TFConv1D,
|
||||
TFPreTrainedModel,
|
||||
@@ -38,6 +38,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "OpenAIGPTTokenizer"
|
||||
|
||||
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"openai-gpt",
|
||||
# See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt
|
||||
@@ -449,6 +451,7 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
|
||||
self.transformer = TFOpenAIGPTMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -466,18 +469,6 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import OpenAIGPTTokenizer, TFOpenAIGPTModel
|
||||
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||
model = TFOpenAIGPTModel.from_pretrained('openai-gpt')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.transformer(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -497,6 +488,7 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel):
|
||||
return self.transformer.tokens_embed
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -514,18 +506,6 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import OpenAIGPTTokenizer, TFOpenAIGPTLMHeadModel
|
||||
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||
model = TFOpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
logits = outputs[0]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(inputs, **kwargs)
|
||||
hidden_states = transformer_outputs[0]
|
||||
@@ -601,26 +581,23 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
# For example purposes. Not runnable.
|
||||
import tensorflow as tf
|
||||
from transformers import OpenAIGPTTokenizer, TFOpenAIGPTDoubleHeadsModel
|
||||
>>> import tensorflow as tf
|
||||
>>> from transformers import OpenAIGPTTokenizer, TFOpenAIGPTDoubleHeadsModel
|
||||
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||
model = TFOpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
|
||||
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
|
||||
>>> model = TFOpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
|
||||
|
||||
# Add a [CLS] to the vocabulary (we should train it also!)
|
||||
# This option is currently not implemented in TF 2.0
|
||||
raise NotImplementedError
|
||||
tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||
model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
|
||||
print(tokenizer.cls_token_id, len(tokenizer)) # The newly token the last token of the vocabulary
|
||||
|
||||
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
input_ids = tf.constant([tokenizer.encode(s) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
mc_token_ids = tf.constant([input_ids.size(-1), input_ids.size(-1)])[None, :] # Batch size 1
|
||||
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
>>> # Add a [CLS] to the vocabulary (we should train it also!)
|
||||
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'})
|
||||
>>> model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
|
||||
>>> print(tokenizer.cls_token_id, len(tokenizer)) # The newly token the last token of the vocabulary
|
||||
|
||||
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
>>> encoding = tokenizer(choices, return_tensors="tf")
|
||||
>>> inputs = {k: tf.expand_dims(v, 0) for k, v in encoding.items()}
|
||||
>>> inputs["mc_token_ids"]= tf.constant([inputs["input_ids"].shape[-1] - 1, inputs["input_ids"].shape[-1] - 1])[None, :] # Batch size 1
|
||||
>>> outputs = model(inputs)
|
||||
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
"""
|
||||
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
@@ -633,7 +610,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
|
||||
mc_token_ids = inputs[6] if len(inputs) > 6 else mc_token_ids
|
||||
output_attentions = inputs[7] if len(inputs) > 7 else output_attentions
|
||||
assert len(inputs) <= 8, "Too many inputs."
|
||||
elif isinstance(inputs, dict):
|
||||
elif isinstance(inputs, (dict, BatchEncoding)):
|
||||
input_ids = inputs.get("input_ids")
|
||||
attention_mask = inputs.get("attention_mask", attention_mask)
|
||||
token_type_ids = inputs.get("token_type_ids", token_type_ids)
|
||||
|
||||
@@ -21,7 +21,12 @@ import logging
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_roberta import RobertaConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_bert import TFBertEmbeddings, TFBertMainLayer, gelu
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
@@ -38,6 +43,8 @@ from .tokenization_utils_base import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "RobertaTokenizer"
|
||||
|
||||
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"roberta-base",
|
||||
"roberta-large",
|
||||
@@ -195,6 +202,7 @@ class TFRobertaModel(TFRobertaPreTrainedModel):
|
||||
self.roberta = TFRobertaMainLayer(config, name="roberta")
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -219,18 +227,6 @@ class TFRobertaModel(TFRobertaPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import RobertaTokenizer, TFRobertaModel
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = TFRobertaModel.from_pretrained('roberta-base')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.roberta(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -279,6 +275,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -296,18 +293,6 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import RobertaTokenizer, TFRobertaForMaskedLM
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = TFRobertaForMaskedLM.from_pretrained('roberta-base')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores = outputs[0]
|
||||
|
||||
"""
|
||||
outputs = self.roberta(inputs, **kwargs)
|
||||
|
||||
@@ -358,6 +343,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
|
||||
self.classifier = TFRobertaClassificationHead(config, name="classifier")
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -387,19 +373,6 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import RobertaTokenizer, TFRobertaForSequenceClassification
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -441,7 +414,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
|
||||
def __init__(self, config, *inputs, **kwargs):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
|
||||
self.roberta = TFBertMainLayer(config, name="roberta")
|
||||
self.roberta = TFRobertaMainLayer(config, name="roberta")
|
||||
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||
self.classifier = tf.keras.layers.Dense(
|
||||
1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
@@ -457,6 +430,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -493,22 +467,6 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import RobertaTokenizer, TFRobertaForMultipleChoice
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = TFRobertaForMultipleChoice.from_pretrained('roberta-base')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode(s, add_special_tokens=True) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -592,6 +550,7 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -625,19 +584,6 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import RobertaTokenizer, TFRobertaForTokenClassification
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = TFRobertaForTokenClassification.from_pretrained('roberta-base')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[8] if len(inputs) > 8 else labels
|
||||
@@ -687,6 +633,7 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -728,24 +675,6 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
# The checkpoint roberta-base is not fine-tuned for question answering. Please see the
|
||||
# examples/question-answering/run_squad.py example to see how to fine-tune a model to a question answering task.
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import RobertaTokenizer, TFRobertaForQuestionAnswering
|
||||
|
||||
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
|
||||
model = TFRobertaForQuestionAnswering.from_pretrained('roberta-base')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[8] if len(inputs) > 8 else start_positions
|
||||
|
||||
@@ -37,6 +37,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "T5Tokenizer"
|
||||
|
||||
TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"t5-small",
|
||||
"t5-base",
|
||||
@@ -931,13 +933,13 @@ class TFT5Model(TFT5PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import T5Tokenizer, TFT5Model
|
||||
>>> from transformers import T5Tokenizer, TFT5Model
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
model = TFT5Model.from_pretrained('t5-small')
|
||||
inputs = tokenizer.encode("Hello, my dog is cute", return_tensors="tf") # Batch size 1
|
||||
outputs = model(inputs, decoder_input_ids=inputs)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
>>> model = TFT5Model.from_pretrained('t5-small')
|
||||
>>> inputs = tokenizer.encode("Hello, my dog is cute", return_tensors="tf") # Batch size 1
|
||||
>>> outputs = model(inputs, decoder_input_ids=inputs)
|
||||
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
|
||||
@@ -1074,18 +1076,18 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
||||
>>> from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
inputs = tokenizer.encode("Hello, my dog is cute", return_tensors="tf") # Batch size 1
|
||||
outputs = model(inputs, decoder_input_ids=inputs)
|
||||
prediction_scores = outputs[0]
|
||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
>>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
>>> inputs = tokenizer.encode("Hello, my dog is cute", return_tensors="tf") # Batch size 1
|
||||
>>> outputs = model(inputs, decoder_input_ids=inputs)
|
||||
>>> prediction_scores = outputs[0]
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
inputs = tokenizer.encode("summarize: Hello, my dog is cute", return_tensors="tf") # Batch size 1
|
||||
model.generate(inputs)
|
||||
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
||||
>>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
|
||||
>>> inputs = tokenizer.encode("summarize: Hello, my dog is cute", return_tensors="tf") # Batch size 1
|
||||
>>> result = model.generate(inputs)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ import logging
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_transfo_xl import TransfoXLConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask
|
||||
from .modeling_tf_utils import (
|
||||
TFPreTrainedModel,
|
||||
@@ -36,6 +36,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "TransfoXLTokenizer"
|
||||
|
||||
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"transfo-xl-wt103",
|
||||
# See all Transformer XL models at https://huggingface.co/models?filter=transfo-xl
|
||||
@@ -722,6 +724,7 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
|
||||
self.transformer = TFTransfoXLMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -743,18 +746,6 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import TransfoXLTokenizer, TFTransfoXLModel
|
||||
|
||||
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
|
||||
model = TFTransfoXLModel.from_pretrained('transfo-xl-wt103')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states, mems = outputs[:2]
|
||||
|
||||
"""
|
||||
outputs = self.transformer(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -811,6 +802,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
|
||||
return self.transformer.init_mems(bsz)
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -842,18 +834,6 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import TransfoXLTokenizer, TFTransfoXLLMHeadModel
|
||||
|
||||
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
|
||||
model = TFTransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores, mems = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -863,7 +843,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
|
||||
labels = inputs[4] if len(inputs) > 4 else labels
|
||||
output_attentions = inputs[5] if len(inputs) > 5 else output_attentions
|
||||
assert len(inputs) <= 6, "Too many inputs."
|
||||
elif isinstance(inputs, dict):
|
||||
elif isinstance(inputs, (BatchEncoding, dict)):
|
||||
input_ids = inputs.get("input_ids")
|
||||
mems = inputs.get("mems", mems)
|
||||
head_mask = inputs.get("head_mask", head_mask)
|
||||
|
||||
@@ -24,7 +24,12 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_xlm import XLMConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
TFPreTrainedModel,
|
||||
@@ -43,6 +48,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "XLMTokenizer"
|
||||
|
||||
TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"xlm-mlm-en-2048",
|
||||
"xlm-mlm-ende-1024",
|
||||
@@ -608,6 +615,7 @@ class TFXLMModel(TFXLMPreTrainedModel):
|
||||
self.transformer = TFXLMMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -625,18 +633,6 @@ class TFXLMModel(TFXLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLMTokenizer, TFXLMModel
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = TFXLMModel.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.transformer(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -704,6 +700,7 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
|
||||
return {"inputs": inputs, "langs": langs}
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -721,18 +718,6 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLMTokenizer, TFXLMWithLMHeadModel
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = TFXLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(inputs, **kwargs)
|
||||
|
||||
@@ -757,6 +742,7 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat
|
||||
self.sequence_summary = TFSequenceSummary(config, initializer_range=config.init_std, name="sequence_summary")
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -795,19 +781,6 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLMTokenizer, TFXLMForSequenceClassification
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = TFXLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[11] if len(inputs) > 11 else labels
|
||||
@@ -865,6 +838,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
@@ -876,9 +850,9 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
cache=None,
|
||||
head_mask=None,
|
||||
inputs_embeds=None,
|
||||
labels=None,
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
labels=None,
|
||||
training=False,
|
||||
):
|
||||
r"""
|
||||
@@ -904,22 +878,6 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLMTokenizer, TFXLMForMultipleChoice
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = TFXLMForMultipleChoice.from_pretrained('xlm-mlm-en-2048')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode(s, add_special_tokens=True) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -932,7 +890,9 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
head_mask = inputs[7] if len(inputs) > 7 else head_mask
|
||||
inputs_embeds = inputs[8] if len(inputs) > 8 else inputs_embeds
|
||||
output_attentions = inputs[9] if len(inputs) > 9 else output_attentions
|
||||
assert len(inputs) <= 10, "Too many inputs."
|
||||
output_hidden_states = inputs[10] if len(inputs) > 10 else output_hidden_states
|
||||
labels = inputs[11] if len(inputs) > 11 else labels
|
||||
assert len(inputs) <= 11, "Too many inputs."
|
||||
elif isinstance(inputs, (dict, BatchEncoding)):
|
||||
input_ids = inputs.get("input_ids")
|
||||
attention_mask = inputs.get("attention_mask", attention_mask)
|
||||
@@ -944,7 +904,9 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
head_mask = inputs.get("head_mask", head_mask)
|
||||
inputs_embeds = inputs.get("inputs_embeds", inputs_embeds)
|
||||
output_attentions = inputs.get("output_attentions", output_attentions)
|
||||
assert len(inputs) <= 10, "Too many inputs."
|
||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||
labels = inputs.get("labels", labels)
|
||||
assert len(inputs) <= 12, "Too many inputs."
|
||||
else:
|
||||
input_ids = inputs
|
||||
|
||||
@@ -1001,13 +963,14 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
|
||||
self.transformer = TFXLMMainLayer(config, name="transformer")
|
||||
self.dropout = tf.keras.layers.Dropout(config.dropout)
|
||||
self.classifier = tf.keras.layers.Dense(
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
config.num_labels, kernel_initializer=get_initializer(config.init_std), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def call(
|
||||
self,
|
||||
input_ids=None,
|
||||
inputs=None,
|
||||
attention_mask=None,
|
||||
langs=None,
|
||||
token_type_ids=None,
|
||||
@@ -1016,9 +979,9 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
|
||||
cache=None,
|
||||
head_mask=None,
|
||||
inputs_embeds=None,
|
||||
labels=None,
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
labels=None,
|
||||
training=False,
|
||||
):
|
||||
r"""
|
||||
@@ -1041,25 +1004,22 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLMTokenizer, TFXLMForTokenClassification
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = TFXLMForTokenClassification.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[11] if len(inputs) > 11 else labels
|
||||
if len(inputs) > 11:
|
||||
inputs = inputs[:11]
|
||||
elif isinstance(inputs, (dict, BatchEncoding)):
|
||||
labels = inputs.pop("labels", labels)
|
||||
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
inputs,
|
||||
attention_mask=attention_mask,
|
||||
langs=langs,
|
||||
token_type_ids=token_type_ids,
|
||||
position_ids=position_ids,
|
||||
lengths=lengths,
|
||||
cache=cache,
|
||||
head_mask=head_mask,
|
||||
inputs_embeds=inputs_embeds,
|
||||
output_attentions=output_attentions,
|
||||
@@ -1072,7 +1032,7 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
|
||||
sequence_output = self.dropout(sequence_output, training=training)
|
||||
logits = self.classifier(sequence_output)
|
||||
|
||||
outputs = (logits,) + transformer_outputs[2:] # add hidden states and attention if they are here
|
||||
outputs = (logits,) + transformer_outputs[1:] # add hidden states and attention if they are here
|
||||
|
||||
if labels is not None:
|
||||
loss = self.compute_loss(labels, logits)
|
||||
@@ -1095,6 +1055,7 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1139,21 +1100,6 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLMTokenizer, TFXLMForQuestionAnsweringSimple
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = TFXLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[11] if len(inputs) > 11 else start_positions
|
||||
|
||||
@@ -23,7 +23,12 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_xlnet import XLNetConfig
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
)
|
||||
from .modeling_tf_utils import (
|
||||
TFMultipleChoiceLoss,
|
||||
TFPreTrainedModel,
|
||||
@@ -42,6 +47,8 @@ from .tokenization_utils import BatchEncoding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "XLNetTokenizer"
|
||||
|
||||
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"xlnet-base-cased",
|
||||
"xlnet-large-cased",
|
||||
@@ -832,6 +839,7 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
|
||||
self.transformer = TFXLNetMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
Return:
|
||||
@@ -853,18 +861,6 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLNetTokenizer, TFXLNetModel
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||
model = TFXLNetModel.from_pretrained('xlnet-large-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
outputs = self.transformer(inputs, **kwargs)
|
||||
return outputs
|
||||
@@ -949,10 +945,13 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
|
||||
|
||||
# We show how to setup inputs to predict a next token using a bi-directional context.
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True))[None, :] # We will predict the masked token
|
||||
|
||||
perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1]))
|
||||
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
|
||||
|
||||
target_mapping = np.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token
|
||||
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
|
||||
|
||||
outputs = model(input_ids, perm_mask=tf.constant(perm_mask, dtype=tf.float32), target_mapping=tf.constant(target_mapping, dtype=tf.float32))
|
||||
|
||||
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
|
||||
@@ -986,6 +985,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1029,19 +1029,6 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLNetTokenizer, TFXLNetForSequenceClassification
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||
model = TFXLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[12] if len(inputs) > 12 else labels
|
||||
@@ -1105,6 +1092,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1145,22 +1133,6 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLNetTokenizer, TFXLNetForMultipleChoice
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||
model = TFXLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
|
||||
input_ids = tf.constant([tokenizer.encode(s, add_special_tokens=True) for s in choices])[None, :] # Batch size 1, 2 choices
|
||||
labels = tf.reshape(tf.constant(1), (-1, 1))
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -1257,6 +1229,8 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1298,19 +1272,6 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLNetTokenizer, TFXLNetForTokenClassification
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||
model = TFXLNetForTokenClassification.from_pretrained('xlnet-large-cased')
|
||||
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
|
||||
labels = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
labels = inputs[12] if len(inputs) > 12 else labels
|
||||
@@ -1361,6 +1322,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def call(
|
||||
self,
|
||||
inputs=None,
|
||||
@@ -1412,21 +1374,6 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import XLNetTokenizer, TFXLNetForQuestionAnsweringSimple
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||
model = TFXLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')
|
||||
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
|
||||
input_dict = tokenizer.encode_plus(question, text, return_tensors='tf')
|
||||
start_scores, end_scores = model(input_dict)
|
||||
|
||||
all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
|
||||
answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
start_positions = inputs[12] if len(inputs) > 12 else start_positions
|
||||
|
||||
@@ -27,13 +27,15 @@ import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from .configuration_transfo_xl import TransfoXLConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax
|
||||
from .modeling_utils import PreTrainedModel
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "TransfoXLTokenizer"
|
||||
|
||||
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"transfo-xl-wt103",
|
||||
# See all Transformer XL models at https://huggingface.co/models?filter=transfo-xl
|
||||
@@ -749,6 +751,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
|
||||
return new_mems
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -778,18 +781,6 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import TransfoXLTokenizer, TransfoXLModel
|
||||
import torch
|
||||
|
||||
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
|
||||
model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states, mems = outputs[:2]
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -945,6 +936,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
|
||||
return self.transformer.init_mems(bsz)
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -984,18 +976,6 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
|
||||
import torch
|
||||
|
||||
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
|
||||
model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
prediction_scores, mems = outputs[:2]
|
||||
|
||||
"""
|
||||
if input_ids is not None:
|
||||
bsz, tgt_len = input_ids.size(0), input_ids.size(1)
|
||||
|
||||
@@ -978,13 +978,15 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
||||
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||
model = AutoModelForCausalLM.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||
outputs = model.generate(max_length=40) # do greedy decoding
|
||||
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer
|
||||
model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache.
|
||||
model = AutoModelForCausalLM.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache.
|
||||
input_context = 'The dog'
|
||||
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||
outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
|
||||
@@ -992,22 +994,22 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
||||
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
||||
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||
model = AutoModelForCausalLM.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||
input_context = 'The dog'
|
||||
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||
outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3) # 3 generate sequences using by sampling
|
||||
outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3, do_sample=True) # 3 generate sequences using by sampling
|
||||
for i in range(3): # 3 output sequences were generated
|
||||
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer
|
||||
model = AutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache.
|
||||
model = AutoModelForCausalLM.from_pretrained('ctrl') # Download model and configuration from S3 and cache.
|
||||
input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl
|
||||
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||
outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences
|
||||
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained('gpt2') # Initialize tokenizer
|
||||
model = AutoModelWithLMHead.from_pretrained('gpt2') # Download model and configuration from S3 and cache.
|
||||
model = AutoModelForCausalLM.from_pretrained('gpt2') # Download model and configuration from S3 and cache.
|
||||
input_context = 'My cute dog' # "Legal" is one of the control codes for ctrl
|
||||
bad_words_ids = [tokenizer.encode(bad_word, add_prefix_space=True) for bad_word in ['idiot', 'stupid', 'shut up']]
|
||||
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||
|
||||
@@ -28,7 +28,7 @@ from torch.nn import functional as F
|
||||
|
||||
from .activations import gelu
|
||||
from .configuration_xlm import XLMConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import (
|
||||
PreTrainedModel,
|
||||
SequenceSummary,
|
||||
@@ -40,6 +40,8 @@ from .modeling_utils import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "XLMTokenizer"
|
||||
|
||||
XLM_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"xlm-mlm-en-2048",
|
||||
"xlm-mlm-ende-1024",
|
||||
@@ -395,6 +397,7 @@ class XLMModel(XLMPreTrainedModel):
|
||||
self.attentions[layer].prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -425,18 +428,6 @@ class XLMModel(XLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLMTokenizer, XLMModel
|
||||
import torch
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -632,6 +623,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
|
||||
return {"input_ids": input_ids, "langs": langs}
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -672,18 +664,6 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLMTokenizer, XLMWithLMHeadModel
|
||||
import torch
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -722,6 +702,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -761,19 +742,6 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLMTokenizer, XLMForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -822,6 +790,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -867,20 +836,6 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLMTokenizer, XLMForQuestionAnsweringSimple
|
||||
import torch
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
start_positions = torch.tensor([1])
|
||||
end_positions = torch.tensor([3])
|
||||
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
loss = outputs[0]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -1006,19 +961,20 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
Example::
|
||||
|
||||
from transformers import XLMTokenizer, XLMForQuestionAnswering
|
||||
import torch
|
||||
>>> from transformers import XLMTokenizer, XLMForQuestionAnswering
|
||||
>>> import torch
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
start_positions = torch.tensor([1])
|
||||
end_positions = torch.tensor([3])
|
||||
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
loss = outputs[0]
|
||||
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
|
||||
>>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
|
||||
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> start_positions = torch.tensor([1])
|
||||
>>> end_positions = torch.tensor([3])
|
||||
|
||||
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
>>> loss = outputs[0]
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -1067,6 +1023,7 @@ class XLMForTokenClassification(XLMPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1074,6 +1031,8 @@ class XLMForTokenClassification(XLMPreTrainedModel):
|
||||
langs=None,
|
||||
token_type_ids=None,
|
||||
position_ids=None,
|
||||
lengths=None,
|
||||
cache=None,
|
||||
head_mask=None,
|
||||
labels=None,
|
||||
output_attentions=None,
|
||||
@@ -1101,19 +1060,6 @@ class XLMForTokenClassification(XLMPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLMTokenizer, XLMForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-100-1280')
|
||||
model = XLMForTokenClassification.from_pretrained('xlm-mlm-100-1280')
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, scores = outputs[:2]
|
||||
|
||||
"""
|
||||
outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -1121,6 +1067,8 @@ class XLMForTokenClassification(XLMPreTrainedModel):
|
||||
langs=langs,
|
||||
token_type_ids=token_type_ids,
|
||||
position_ids=position_ids,
|
||||
lengths=lengths,
|
||||
cache=cache,
|
||||
head_mask=head_mask,
|
||||
output_attentions=output_attentions,
|
||||
output_hidden_states=output_hidden_states,
|
||||
|
||||
@@ -26,12 +26,14 @@ from torch.nn import functional as F
|
||||
|
||||
from .activations import gelu_new, swish
|
||||
from .configuration_xlnet import XLNetConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_utils import PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits, PreTrainedModel, SequenceSummary
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKENIZER_FOR_DOC = "XLNetTokenizer"
|
||||
|
||||
XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"xlnet-base-cased",
|
||||
"xlnet-large-cased",
|
||||
@@ -749,6 +751,7 @@ class XLNetModel(XLNetPreTrainedModel):
|
||||
return pos_emb
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -785,20 +788,6 @@ class XLNetModel(XLNetPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLNetTokenizer, XLNetModel
|
||||
import torch
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||
model = XLNetModel.from_pretrained('xlnet-large-cased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=False)).unsqueeze(0) # Batch size 1
|
||||
|
||||
outputs = model(input_ids)
|
||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -1164,6 +1153,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1208,20 +1198,6 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLNetTokenizer, XLNetForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
@@ -1273,6 +1249,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1316,21 +1293,6 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLNetTokenizer, XLNetForTokenClassification
|
||||
import torch
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
|
||||
model = XLNetForTokenClassification.from_pretrained('xlnet-large-cased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, labels=labels)
|
||||
|
||||
scores = outputs[0]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.transformer(
|
||||
@@ -1386,6 +1348,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1431,22 +1394,6 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLNetTokenizer, XLNetForMultipleChoice
|
||||
import torch
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||
model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
|
||||
|
||||
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
||||
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
|
||||
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, classification_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
|
||||
@@ -1508,6 +1455,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased")
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@@ -1558,22 +1506,6 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple
|
||||
import torch
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||
model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
start_positions = torch.tensor([1])
|
||||
end_positions = torch.tensor([3])
|
||||
|
||||
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
loss = outputs[0]
|
||||
|
||||
"""
|
||||
|
||||
outputs = self.transformer(
|
||||
@@ -1705,20 +1637,20 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
|
||||
heads.
|
||||
|
||||
Examples::
|
||||
Example::
|
||||
|
||||
from transformers import XLNetTokenizer, XLNetForQuestionAnswering
|
||||
import torch
|
||||
>>> from transformers import XLNetTokenizer, XLNetForQuestionAnswering
|
||||
>>> import torch
|
||||
|
||||
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||
model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
|
||||
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
|
||||
>>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
|
||||
|
||||
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
start_positions = torch.tensor([1])
|
||||
end_positions = torch.tensor([3])
|
||||
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
loss = outputs[0]
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> start_positions = torch.tensor([1])
|
||||
>>> end_positions = torch.tensor([3])
|
||||
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||
|
||||
>>> loss = outputs[0]
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
|
||||
@@ -66,13 +66,15 @@ class MBartTokenizer(XLMRobertaTokenizer):
|
||||
The tokenization method is <tokens> <eos> <language code>. There is no BOS token.
|
||||
|
||||
Examples::
|
||||
from transformers import MBartTokenizer
|
||||
tokenizer = MBartTokenizer.from_pretrained('mbart-large-en-ro')
|
||||
example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
|
||||
expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
|
||||
batch: dict = tokenizer.prepare_translation_batch(
|
||||
example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
|
||||
)
|
||||
|
||||
>>> from transformers import MBartTokenizer
|
||||
>>> tokenizer = MBartTokenizer.from_pretrained('facebook/mbart-large-en-ro')
|
||||
>>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
|
||||
>>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
|
||||
>>> batch: dict = tokenizer.prepare_translation_batch(
|
||||
... example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
|
||||
... )
|
||||
|
||||
"""
|
||||
|
||||
vocab_files_names = {"vocab_file": "sentencepiece.bpe.model"}
|
||||
|
||||
@@ -25,13 +25,13 @@ class MarianTokenizer(PreTrainedTokenizer):
|
||||
|
||||
Examples::
|
||||
|
||||
from transformers import MarianTokenizer
|
||||
tok = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de')
|
||||
src_texts = [ "I am a small frog.", "Tom asked his teacher for advice."]
|
||||
tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."] # optional
|
||||
batch_enc: BatchEncoding = tok.prepare_translation_batch(src_texts, tgt_texts=tgt_texts)
|
||||
# keys [input_ids, attention_mask, decoder_input_ids, decoder_attention_mask].
|
||||
# model(**batch) should work
|
||||
>>> from transformers import MarianTokenizer
|
||||
>>> tok = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de')
|
||||
>>> src_texts = [ "I am a small frog.", "Tom asked his teacher for advice."]
|
||||
>>> tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."] # optional
|
||||
>>> batch_enc: BatchEncoding = tok.prepare_translation_batch(src_texts, tgt_texts=tgt_texts)
|
||||
>>> # keys [input_ids, attention_mask, decoder_input_ids, decoder_attention_mask].
|
||||
>>> # model(**batch) should work
|
||||
"""
|
||||
|
||||
vocab_files_names = vocab_files_names
|
||||
|
||||
@@ -81,6 +81,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
model_input_names = ["attention_mask"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
@@ -94,6 +94,7 @@ class T5Tokenizer(PreTrainedTokenizer):
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
model_input_names = ["attention_mask"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
@@ -13,52 +13,19 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import doctest
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import List, Union
|
||||
|
||||
import transformers
|
||||
|
||||
from .utils import require_tf, require_torch, slow
|
||||
|
||||
|
||||
def get_examples_from_file(file):
|
||||
examples = []
|
||||
example = []
|
||||
example_mode = False
|
||||
example_indentation = None
|
||||
for i, line in enumerate(file):
|
||||
if example_mode:
|
||||
current_indentation = len(line) - len(line.strip()) - 1
|
||||
|
||||
# Check if the indentation is 0 for the example, so that we don't exit as soon as there's a line return.
|
||||
empty_line = example_indentation == 0 and len(line) == 1
|
||||
|
||||
# If we're back to the example indentation or if it's the end of the docstring.
|
||||
if (current_indentation == example_indentation and not empty_line) or '"""' in line:
|
||||
# Exit the example mode and add the example to the examples list
|
||||
example_mode = False
|
||||
example_indentation = None
|
||||
examples.append(example)
|
||||
example = []
|
||||
else:
|
||||
# If line is not empty, add it to the current example
|
||||
if line != "\n":
|
||||
example.append(line[example_indentation + 4 : -1])
|
||||
|
||||
# Detect the example from '::' or 'example::'
|
||||
if "example::" in line.lower():
|
||||
example_mode = True
|
||||
example_indentation = line.lower().find("example::")
|
||||
elif "examples::" in line.lower():
|
||||
example_mode = True
|
||||
example_indentation = line.lower().find("examples::")
|
||||
# elif "::" in line.lower() and len(line.strip()) == 2:
|
||||
# example_mode = True
|
||||
# example_indentation = line.lower().find("::")
|
||||
|
||||
examples = ["\n".join(example) for example in examples]
|
||||
examples = [example for example in examples if "not runnable" not in example.lower()]
|
||||
|
||||
return examples
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@require_torch
|
||||
@@ -66,68 +33,81 @@ def get_examples_from_file(file):
|
||||
@slow
|
||||
class TestCodeExamples(unittest.TestCase):
|
||||
def analyze_directory(
|
||||
self, directory: str, identifier: Union[str, None] = None, ignore_files: Union[List[str], None] = None
|
||||
self,
|
||||
directory: Path,
|
||||
identifier: Union[str, None] = None,
|
||||
ignore_files: Union[List[str], None] = [],
|
||||
n_identifier: Union[str, None] = None,
|
||||
only_modules: bool = True,
|
||||
):
|
||||
"""
|
||||
Runs through the specific directory, looking for the files identified with `identifier`. Executes
|
||||
the doctests in those files
|
||||
|
||||
Args:
|
||||
directory (:obj:`str`): Directory containing the files
|
||||
identifier (:obj:`str`): Will parse files containing this
|
||||
ignore_files (:obj:`List[str]`): List of files to skip
|
||||
n_identifier (:obj:`str` or :obj:`List[str]`): Will not parse files containing this/these identifiers.
|
||||
only_modules (:obj:`bool`): Whether to only analyze modules
|
||||
"""
|
||||
files = [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
|
||||
|
||||
if identifier is not None:
|
||||
files = [file for file in files if identifier in file]
|
||||
|
||||
if ignore_files is not None:
|
||||
files = [file for file in files if file not in ignore_files]
|
||||
if n_identifier is not None:
|
||||
if isinstance(n_identifier, List):
|
||||
for n_ in n_identifier:
|
||||
files = [file for file in files if n_ not in file]
|
||||
else:
|
||||
files = [file for file in files if n_identifier not in file]
|
||||
|
||||
ignore_files.append("__init__.py")
|
||||
files = [file for file in files if file not in ignore_files]
|
||||
|
||||
for file in files:
|
||||
# Open all files
|
||||
print("Testing", file, end=" ")
|
||||
with open(os.path.join(directory, file)) as f:
|
||||
# Retrieve examples
|
||||
examples = get_examples_from_file(f)
|
||||
joined_examples = []
|
||||
print("Testing", file)
|
||||
|
||||
def execute_example(code_example):
|
||||
exec(code_example, {})
|
||||
|
||||
# Some examples are the continuation of others.
|
||||
if len(examples) > 0:
|
||||
joined_examples.append(examples[0])
|
||||
joined_examples_index = 0
|
||||
for example in examples[1:]:
|
||||
# If they contain this line, then they're a continuation of the previous script
|
||||
if "# Continuation of the previous script" in example:
|
||||
joined_examples[joined_examples_index] += "\n" + example
|
||||
# If not, create a new example and increment the index
|
||||
else:
|
||||
joined_examples.append(example)
|
||||
joined_examples_index += 1
|
||||
|
||||
print(str(len(joined_examples)) + "/" + str(len(joined_examples)))
|
||||
|
||||
# Execute sub tests with every example.
|
||||
for index, code_example in enumerate(joined_examples):
|
||||
with self.subTest(msg=file + " " + str(index) + "/" + str(len(joined_examples)) + code_example):
|
||||
execute_example(code_example)
|
||||
|
||||
def test_configuration_examples(self):
|
||||
transformers_directory = "src/transformers"
|
||||
configuration_files = "configuration"
|
||||
ignore_files = ["configuration_auto.py", "configuration_utils.py"]
|
||||
self.analyze_directory(transformers_directory, identifier=configuration_files, ignore_files=ignore_files)
|
||||
|
||||
def test_main_doc_examples(self):
|
||||
doc_directory = "docs/source"
|
||||
ignore_files = ["favicon.ico"]
|
||||
self.analyze_directory(doc_directory, ignore_files=ignore_files)
|
||||
if only_modules:
|
||||
try:
|
||||
module_identifier = file.split(".")[0]
|
||||
module_identifier = getattr(transformers, module_identifier)
|
||||
suite = doctest.DocTestSuite(module_identifier)
|
||||
result = unittest.TextTestRunner().run(suite)
|
||||
self.assertIs(len(result.failures), 0)
|
||||
except AttributeError:
|
||||
logger.info(f"{module_identifier} is not a module.")
|
||||
else:
|
||||
result = doctest.testfile(str(".." / directory / file), optionflags=doctest.ELLIPSIS)
|
||||
self.assertIs(result.failed, 0)
|
||||
|
||||
def test_modeling_examples(self):
|
||||
transformers_directory = "src/transformers"
|
||||
modeling_files = "modeling"
|
||||
files = "modeling"
|
||||
ignore_files = [
|
||||
"modeling_auto.py",
|
||||
"modeling_t5.py",
|
||||
"modeling_tf_auto.py",
|
||||
"modeling_utils.py",
|
||||
"modeling_tf_t5.py",
|
||||
"modeling_bart.py",
|
||||
"modeling_tf_utils.py",
|
||||
"modeling_ctrl.py",
|
||||
"modeling_tf_ctrl.py",
|
||||
]
|
||||
self.analyze_directory(transformers_directory, identifier=modeling_files, ignore_files=ignore_files)
|
||||
self.analyze_directory(transformers_directory, identifier=files, ignore_files=ignore_files)
|
||||
|
||||
def test_tokenization_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "tokenization"
|
||||
self.analyze_directory(transformers_directory, identifier=files)
|
||||
|
||||
def test_configuration_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "configuration"
|
||||
self.analyze_directory(transformers_directory, identifier=files)
|
||||
|
||||
def test_remaining_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
n_identifiers = ["configuration", "modeling", "tokenization"]
|
||||
self.analyze_directory(transformers_directory, n_identifier=n_identifiers)
|
||||
|
||||
def test_doc_sources(self):
|
||||
doc_source_directory = Path("docs/source")
|
||||
ignore_files = ["favicon.ico"]
|
||||
self.analyze_directory(doc_source_directory, ignore_files=ignore_files, only_modules=False)
|
||||
|
||||
@@ -31,6 +31,7 @@ if is_tf_available():
|
||||
TFXLMWithLMHeadModel,
|
||||
TFXLMForSequenceClassification,
|
||||
TFXLMForQuestionAnsweringSimple,
|
||||
TFXLMForTokenClassification,
|
||||
TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
|
||||
@@ -219,6 +220,26 @@ class TFXLMModelTester:
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
|
||||
|
||||
def create_and_check_xlm_for_token_classification(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_lengths,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
is_impossible_labels,
|
||||
input_mask,
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFXLMForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
@@ -244,7 +265,14 @@ class TFXLMModelTester:
|
||||
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (
|
||||
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
|
||||
# TODO The multiple choice model is missing and should be added.
|
||||
(
|
||||
TFXLMModel,
|
||||
TFXLMWithLMHeadModel,
|
||||
TFXLMForSequenceClassification,
|
||||
TFXLMForQuestionAnsweringSimple,
|
||||
TFXLMForTokenClassification,
|
||||
)
|
||||
if is_tf_available()
|
||||
else ()
|
||||
)
|
||||
@@ -275,6 +303,10 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_xlm_sequence_classif(*config_and_inputs)
|
||||
|
||||
def test_for_token_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
Reference in New Issue
Block a user