Apply quality and style requirements
This commit is contained in:
committed by
Lysandre Debut
parent
a3998e76ae
commit
0731fa1587
@@ -29,10 +29,8 @@ from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
|
|||||||
from .configuration_mmbt import MMBTConfig
|
from .configuration_mmbt import MMBTConfig
|
||||||
from .configuration_openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig
|
from .configuration_openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig
|
||||||
from .configuration_roberta import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, RobertaConfig
|
from .configuration_roberta import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, RobertaConfig
|
||||||
from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
|
|
||||||
from .configuration_t5 import T5_PRETRAINED_CONFIG_ARCHIVE_MAP, T5Config
|
from .configuration_t5 import T5_PRETRAINED_CONFIG_ARCHIVE_MAP, T5Config
|
||||||
from .configuration_transfo_xl import TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TransfoXLConfig
|
from .configuration_transfo_xl import TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TransfoXLConfig
|
||||||
|
|
||||||
# Configurations
|
# Configurations
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
from .configuration_xlm import XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMConfig
|
from .configuration_xlm import XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMConfig
|
||||||
@@ -57,7 +55,6 @@ from .data import (
|
|||||||
xnli_processors,
|
xnli_processors,
|
||||||
xnli_tasks_num_labels,
|
xnli_tasks_num_labels,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Files and general utilities
|
# Files and general utilities
|
||||||
from .file_utils import (
|
from .file_utils import (
|
||||||
CONFIG_NAME,
|
CONFIG_NAME,
|
||||||
@@ -74,10 +71,8 @@ from .file_utils import (
|
|||||||
is_tf_available,
|
is_tf_available,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Model Cards
|
# Model Cards
|
||||||
from .modelcard import ModelCard
|
from .modelcard import ModelCard
|
||||||
|
|
||||||
# TF 2.0 <=> PyTorch conversion utilities
|
# TF 2.0 <=> PyTorch conversion utilities
|
||||||
from .modeling_tf_pytorch_utils import (
|
from .modeling_tf_pytorch_utils import (
|
||||||
convert_tf_weight_name_to_pt_weight_name,
|
convert_tf_weight_name_to_pt_weight_name,
|
||||||
@@ -88,7 +83,6 @@ from .modeling_tf_pytorch_utils import (
|
|||||||
load_tf2_model_in_pytorch_model,
|
load_tf2_model_in_pytorch_model,
|
||||||
load_tf2_weights_in_pytorch_model,
|
load_tf2_weights_in_pytorch_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Pipelines
|
# Pipelines
|
||||||
from .pipelines import (
|
from .pipelines import (
|
||||||
CsvPipelineDataFormat,
|
CsvPipelineDataFormat,
|
||||||
@@ -114,7 +108,6 @@ from .tokenization_openai import OpenAIGPTTokenizer
|
|||||||
from .tokenization_roberta import RobertaTokenizer
|
from .tokenization_roberta import RobertaTokenizer
|
||||||
from .tokenization_t5 import T5Tokenizer
|
from .tokenization_t5 import T5Tokenizer
|
||||||
from .tokenization_transfo_xl import TransfoXLCorpus, TransfoXLTokenizer
|
from .tokenization_transfo_xl import TransfoXLCorpus, TransfoXLTokenizer
|
||||||
|
|
||||||
# Tokenizers
|
# Tokenizers
|
||||||
from .tokenization_utils import PreTrainedTokenizer
|
from .tokenization_utils import PreTrainedTokenizer
|
||||||
from .tokenization_xlm import XLMTokenizer
|
from .tokenization_xlm import XLMTokenizer
|
||||||
|
|||||||
@@ -22,12 +22,12 @@ import os
|
|||||||
from transformers import (
|
from transformers import (
|
||||||
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
|
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
|
||||||
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
@@ -35,17 +35,18 @@ from transformers import (
|
|||||||
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||||
AlbertConfig,
|
AlbertConfig,
|
||||||
BertConfig,
|
BertConfig,
|
||||||
|
CamembertConfig,
|
||||||
CTRLConfig,
|
CTRLConfig,
|
||||||
DistilBertConfig,
|
DistilBertConfig,
|
||||||
GPT2Config,
|
GPT2Config,
|
||||||
OpenAIGPTConfig,
|
OpenAIGPTConfig,
|
||||||
RobertaConfig,
|
RobertaConfig,
|
||||||
CamembertConfig,
|
|
||||||
T5Config,
|
T5Config,
|
||||||
TFAlbertForMaskedLM,
|
TFAlbertForMaskedLM,
|
||||||
TFBertForPreTraining,
|
TFBertForPreTraining,
|
||||||
TFBertForQuestionAnswering,
|
TFBertForQuestionAnswering,
|
||||||
TFBertForSequenceClassification,
|
TFBertForSequenceClassification,
|
||||||
|
TFCamembertForMaskedLM,
|
||||||
TFCTRLLMHeadModel,
|
TFCTRLLMHeadModel,
|
||||||
TFDistilBertForMaskedLM,
|
TFDistilBertForMaskedLM,
|
||||||
TFDistilBertForQuestionAnswering,
|
TFDistilBertForQuestionAnswering,
|
||||||
@@ -53,8 +54,6 @@ from transformers import (
|
|||||||
TFOpenAIGPTLMHeadModel,
|
TFOpenAIGPTLMHeadModel,
|
||||||
TFRobertaForMaskedLM,
|
TFRobertaForMaskedLM,
|
||||||
TFRobertaForSequenceClassification,
|
TFRobertaForSequenceClassification,
|
||||||
TFCamembertForMaskedLM,
|
|
||||||
TFCamembertForSequenceClassification,
|
|
||||||
TFT5WithLMHeadModel,
|
TFT5WithLMHeadModel,
|
||||||
TFTransfoXLLMHeadModel,
|
TFTransfoXLLMHeadModel,
|
||||||
TFXLMRobertaForMaskedLM,
|
TFXLMRobertaForMaskedLM,
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
from .configuration_camembert import CamembertConfig
|
from .configuration_camembert import CamembertConfig
|
||||||
from .file_utils import add_start_docstrings
|
from .file_utils import add_start_docstrings
|
||||||
from .modeling_tf_roberta import (
|
from .modeling_tf_roberta import (
|
||||||
@@ -29,21 +27,22 @@ from .modeling_tf_roberta import (
|
|||||||
TFRobertaModel,
|
TFRobertaModel,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
|
TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
|
||||||
#"camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-tf_model.h5"
|
# "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-tf_model.h5"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CAMEMBERT_START_DOCSTRING = r""" The CamemBERT model was proposed in
|
CAMEMBERT_START_DOCSTRING = r""" The CamemBERT model was proposed in
|
||||||
`CamemBERT: a Tasty French Language Model`_
|
`CamemBERT: a Tasty French Language Model`_
|
||||||
by Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019.
|
by Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019.
|
||||||
|
|
||||||
It is a model trained on 138GB of French text.
|
It is a model trained on 138GB of French text.
|
||||||
|
|
||||||
This implementation is the same as RoBERTa.
|
This implementation is the same as RoBERTa.
|
||||||
|
|
||||||
This model is a tf.keras.Model `tf.keras.Model`_ sub-class. Use it as a regular TF 2.0 Keras Model and
|
This model is a tf.keras.Model `tf.keras.Model`_ sub-class. Use it as a regular TF 2.0 Keras Model and
|
||||||
refer to the TF 2.0 documentation for all matter related to general usage and behavior.
|
refer to the TF 2.0 documentation for all matter related to general usage and behavior.
|
||||||
|
|
||||||
@@ -52,7 +51,7 @@ CAMEMBERT_START_DOCSTRING = r""" The CamemBERT model was proposed in
|
|||||||
|
|
||||||
.. _`tf.keras.Model`:
|
.. _`tf.keras.Model`:
|
||||||
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Model
|
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Model
|
||||||
|
|
||||||
Note on the model inputs:
|
Note on the model inputs:
|
||||||
TF 2.0 models accepts two formats as inputs:
|
TF 2.0 models accepts two formats as inputs:
|
||||||
|
|
||||||
@@ -60,15 +59,15 @@ CAMEMBERT_START_DOCSTRING = r""" The CamemBERT model was proposed in
|
|||||||
- having all inputs as a list, tuple or dict in the first positional arguments.
|
- having all inputs as a list, tuple or dict in the first positional arguments.
|
||||||
|
|
||||||
This second option is usefull when using `tf.keras.Model.fit()` method which currently requires having all the tensors in the first argument of the model call function: `model(inputs)`.
|
This second option is usefull when using `tf.keras.Model.fit()` method which currently requires having all the tensors in the first argument of the model call function: `model(inputs)`.
|
||||||
|
|
||||||
If you choose this second option, there are three possibilities you can use to gather all the input Tensors in the first positional argument :
|
If you choose this second option, there are three possibilities you can use to gather all the input Tensors in the first positional argument :
|
||||||
|
|
||||||
- a single Tensor with input_ids only and nothing else: `model(inputs_ids)
|
- a single Tensor with input_ids only and nothing else: `model(inputs_ids)
|
||||||
- a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
|
- a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
|
||||||
`model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
|
`model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
|
||||||
- a dictionary with one or several input Tensors associaed to the input names given in the docstring:
|
- a dictionary with one or several input Tensors associaed to the input names given in the docstring:
|
||||||
`model({'input_ids': input_ids, 'token_type_ids': token_type_ids})`
|
`model({'input_ids': input_ids, 'token_type_ids': token_type_ids})`
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
config (:class:`~transformers.CamembertConfig`): Model configuration class with all the parameters of the
|
config (:class:`~transformers.CamembertConfig`): Model configuration class with all the parameters of the
|
||||||
model. Initializing with a config file does not load the weights associated with the model, only the configuration.
|
model. Initializing with a config file does not load the weights associated with the model, only the configuration.
|
||||||
@@ -80,21 +79,21 @@ CAMEMBERT_INPUTS_DOCSTRING = r"""
|
|||||||
**input_ids**: ``Numpy array`` or ``tf.Tensor`` of shape ``(batch_size, sequence_length)``:
|
**input_ids**: ``Numpy array`` or ``tf.Tensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
Indices of input sequence tokens in the vocabulary.
|
Indices of input sequence tokens in the vocabulary.
|
||||||
To match pre-training, CamemBERT input sequence should be formatted with <s> and </s> tokens as follows:
|
To match pre-training, CamemBERT input sequence should be formatted with <s> and </s> tokens as follows:
|
||||||
|
|
||||||
(a) For sequence pairs:
|
(a) For sequence pairs:
|
||||||
|
|
||||||
``tokens: <s> Is this Jacksonville ? </s> </s> No it is not . </s>``
|
``tokens: <s> Is this Jacksonville ? </s> </s> No it is not . </s>``
|
||||||
|
|
||||||
(b) For single sequences:
|
(b) For single sequences:
|
||||||
|
|
||||||
``tokens: <s> the dog is hairy . </s>``
|
``tokens: <s> the dog is hairy . </s>``
|
||||||
|
|
||||||
Fully encoded sequences or sequence pairs can be obtained using the CamembertTokenizer.encode function with
|
Fully encoded sequences or sequence pairs can be obtained using the CamembertTokenizer.encode function with
|
||||||
the ``add_special_tokens`` parameter set to ``True``.
|
the ``add_special_tokens`` parameter set to ``True``.
|
||||||
|
|
||||||
CamemBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on
|
CamemBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on
|
||||||
the right rather than the left.
|
the right rather than the left.
|
||||||
|
|
||||||
See :func:`transformers.PreTrainedTokenizer.encode` and
|
See :func:`transformers.PreTrainedTokenizer.encode` and
|
||||||
:func:`transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
|
:func:`transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
|
||||||
**attention_mask**: (`optional`) ``Numpy array`` or ``tf.Tensor`` of shape ``(batch_size, sequence_length)``:
|
**attention_mask**: (`optional`) ``Numpy array`` or ``tf.Tensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
@@ -137,19 +136,19 @@ class TFCamembertModel(TFRobertaModel):
|
|||||||
further processed by a Linear layer and a Tanh activation function. The Linear
|
further processed by a Linear layer and a Tanh activation function. The Linear
|
||||||
layer weights are trained from the next sentence prediction (classification)
|
layer weights are trained from the next sentence prediction (classification)
|
||||||
eo match pre-training, CamemBERT input sequence should be formatted with [CLS] and [SEP] tokens as follows:
|
eo match pre-training, CamemBERT input sequence should be formatted with [CLS] and [SEP] tokens as follows:
|
||||||
|
|
||||||
(a) For sequence pairs:
|
(a) For sequence pairs:
|
||||||
|
|
||||||
``tokens: [CLS] is this jack ##son ##ville ? [SEP] [SEP] no it is not . [SEP]``
|
``tokens: [CLS] is this jack ##son ##ville ? [SEP] [SEP] no it is not . [SEP]``
|
||||||
|
|
||||||
``token_type_ids: 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1``
|
``token_type_ids: 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1``
|
||||||
|
|
||||||
(b) For single sequences:
|
(b) For single sequences:
|
||||||
|
|
||||||
``tokens: [CLS] the dog is hairy . [SEP]``
|
``tokens: [CLS] the dog is hairy . [SEP]``
|
||||||
|
|
||||||
``token_type_ids: 0 0 0 0 0 0 0``
|
``token_type_ids: 0 0 0 0 0 0 0``
|
||||||
|
|
||||||
objective during Bert pretraining. This output is usually *not* a good summary
|
objective during Bert pretraining. This output is usually *not* a good summary
|
||||||
of the semantic content of the input, you're often better with averaging or pooling
|
of the semantic content of the input, you're often better with averaging or pooling
|
||||||
the sequence of hidden-states for the whole input sequence.
|
the sequence of hidden-states for the whole input sequence.
|
||||||
@@ -160,15 +159,15 @@ class TFCamembertModel(TFRobertaModel):
|
|||||||
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
||||||
model = TFCamembertModel.from_pretrained('camembert-base')
|
model = TFCamembertModel.from_pretrained('camembert-base')
|
||||||
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !"))[None, :] # Batch size 1
|
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !"))[None, :] # Batch size 1
|
||||||
outputs = model(input_ids)
|
outputs = model(input_ids)
|
||||||
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config_class = CamembertConfig
|
config_class = CamembertConfig
|
||||||
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
@@ -186,7 +185,7 @@ class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
|
|||||||
Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
|
Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
|
||||||
Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
|
Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
|
||||||
in ``[0, ..., config.vocab_size]``
|
in ``[0, ..., config.vocab_size]``
|
||||||
|
|
||||||
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
**loss**: (`optional`, returned when ``masked_lm_labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
**loss**: (`optional`, returned when ``masked_lm_labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
Masked language modeling loss.
|
Masked language modeling loss.
|
||||||
@@ -199,15 +198,15 @@ class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
|
|||||||
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
||||||
model = TFCamembertForMaskedLM.from_pretrained('camembert-base')
|
model = TFCamembertForMaskedLM.from_pretrained('camembert-base')
|
||||||
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !"))[None, :] # Batch size 1
|
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !"))[None, :] # Batch size 1
|
||||||
outputs = model(input_ids, masked_lm_labels=input_ids)
|
outputs = model(input_ids, masked_lm_labels=input_ids)
|
||||||
loss, prediction_scores = outputs[:2]
|
loss, prediction_scores = outputs[:2]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config_class = CamembertConfig
|
config_class = CamembertConfig
|
||||||
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
@@ -226,7 +225,7 @@ class TFCamembertForSequenceClassification(TFRobertaForSequenceClassification):
|
|||||||
Indices should be in ``[0, ..., config.num_labels]``.
|
Indices should be in ``[0, ..., config.num_labels]``.
|
||||||
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss),
|
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss),
|
||||||
If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy).
|
If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy).
|
||||||
|
|
||||||
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
Classification (or regression if config.num_labels==1) loss.
|
Classification (or regression if config.num_labels==1) loss.
|
||||||
@@ -239,15 +238,15 @@ class TFCamembertForSequenceClassification(TFRobertaForSequenceClassification):
|
|||||||
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
||||||
model = TFCamembertForSequenceClassification.from_pretrained('camembert-base')
|
model = TFCamembertForSequenceClassification.from_pretrained('camembert-base')
|
||||||
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !"))[None, :] # Batch size 1
|
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !"))[None, :] # Batch size 1
|
||||||
outputs = model(input_ids)
|
outputs = model(input_ids)
|
||||||
loss, logits = outputs[:2]
|
loss, logits = outputs[:2]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config_class = CamembertConfig
|
config_class = CamembertConfig
|
||||||
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
@@ -264,7 +263,7 @@ class TFCamembertForTokenClassification(TFRobertaForTokenClassification):
|
|||||||
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
Labels for computing the token classification loss.
|
Labels for computing the token classification loss.
|
||||||
Indices should be in ``[0, ..., config.num_labels - 1]``.
|
Indices should be in ``[0, ..., config.num_labels - 1]``.
|
||||||
|
|
||||||
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
Classification loss.
|
Classification loss.
|
||||||
@@ -277,15 +276,15 @@ class TFCamembertForTokenClassification(TFRobertaForTokenClassification):
|
|||||||
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
|
||||||
model = TFCamembertForTokenClassification.from_pretrained('camembert-base')
|
model = TFCamembertForTokenClassification.from_pretrained('camembert-base')
|
||||||
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !", add_special_tokens=True))[None, :] # Batch size 1
|
input_ids = tf.constant(tokenizer.encode("J'aime le camembert !", add_special_tokens=True))[None, :] # Batch size 1
|
||||||
outputs = model(input_ids)
|
outputs = model(input_ids)
|
||||||
loss, scores = outputs[:2]
|
loss, scores = outputs[:2]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config_class = CamembertConfig
|
config_class = CamembertConfig
|
||||||
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
|||||||
@@ -52,7 +52,6 @@ from utils_squad import (
|
|||||||
write_predictions,
|
write_predictions,
|
||||||
write_predictions_extended,
|
write_predictions_extended,
|
||||||
)
|
)
|
||||||
|
|
||||||
# The follwing import is the official SQuAD evaluation script (2.0).
|
# The follwing import is the official SQuAD evaluation script (2.0).
|
||||||
# You can remove it from the dependencies if you are using this script outside of the library
|
# You can remove it from the dependencies if you are using this script outside of the library
|
||||||
# We've added it here for automated tests (see examples/test_examples.py file)
|
# We've added it here for automated tests (see examples/test_examples.py file)
|
||||||
@@ -333,7 +332,8 @@ def evaluate(args, model, tokenizer, prefix=""):
|
|||||||
|
|
||||||
def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
|
def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
|
||||||
if args.local_rank not in [-1, 0] and not evaluate:
|
if args.local_rank not in [-1, 0] and not evaluate:
|
||||||
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
|
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset,
|
||||||
|
# and the others will use the cache
|
||||||
|
|
||||||
# Load data features from cache or dataset file
|
# Load data features from cache or dataset file
|
||||||
input_file = args.predict_file if evaluate else args.train_file
|
input_file = args.predict_file if evaluate else args.train_file
|
||||||
@@ -366,7 +366,8 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
|
|||||||
torch.save(features, cached_features_file)
|
torch.save(features, cached_features_file)
|
||||||
|
|
||||||
if args.local_rank == 0 and not evaluate:
|
if args.local_rank == 0 and not evaluate:
|
||||||
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
|
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset,
|
||||||
|
# and the others will use the cache
|
||||||
|
|
||||||
# Convert to Tensors and build dataset
|
# Convert to Tensors and build dataset
|
||||||
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
||||||
@@ -620,7 +621,8 @@ def main():
|
|||||||
|
|
||||||
# Load pretrained model and tokenizer
|
# Load pretrained model and tokenizer
|
||||||
if args.local_rank not in [-1, 0]:
|
if args.local_rank not in [-1, 0]:
|
||||||
torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
|
torch.distributed.barrier() # Make sure only the first process in distributed training will
|
||||||
|
# download model & vocab
|
||||||
|
|
||||||
args.model_type = args.model_type.lower()
|
args.model_type = args.model_type.lower()
|
||||||
config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
|
config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
|
||||||
@@ -641,15 +643,16 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
if args.local_rank == 0:
|
if args.local_rank == 0:
|
||||||
torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
|
torch.distributed.barrier() # Make sure only the first process in distributed training will
|
||||||
|
# download model & vocab
|
||||||
|
|
||||||
model.to(args.device)
|
model.to(args.device)
|
||||||
|
|
||||||
logger.info("Training/evaluation parameters %s", args)
|
logger.info("Training/evaluation parameters %s", args)
|
||||||
|
|
||||||
# Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
|
# Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum
|
||||||
# Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
|
# if args.fp16 is set. Otherwise it'll default to "promote" mode, and we'll get fp32 operations.
|
||||||
# remove the need for this code, but it is still valid.
|
# Note that running `--fp16_opt_level="O2"` will remove the need for this code, but it is still valid.
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
try:
|
try:
|
||||||
import apex
|
import apex
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ import logging
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
from transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize
|
from transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize
|
||||||
|
|
||||||
# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method)
|
# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method)
|
||||||
from utils_squad_evaluate import find_all_best_thresh_v2, get_raw_scores, make_qid_to_has_ans
|
from utils_squad_evaluate import find_all_best_thresh_v2, get_raw_scores, make_qid_to_has_ans
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user