From 026a2ff225afec108dbba2b747b07bf5cb5490e5 Mon Sep 17 00:00:00 2001 From: "Ratthachat (Jung)" <56621342+ratthachat@users.noreply.github.com> Date: Thu, 12 Nov 2020 00:28:09 +0700 Subject: [PATCH] Add TFDPR (#8203) * Create modeling_tf_dpr.py * Add TFDPR * Add back TFPegasus, TFMarian, TFMBart, TFBlenderBot last commit accidentally deleted these 4 lines, so I recover them back * Add TFDPR * Add TFDPR * clean up some comments, add TF input-style doc string * Add TFDPR * Make return_dict=False as default * Fix return_dict bug (in .from_pretrained) * Add get_input_embeddings() * Create test_modeling_tf_dpr.py The current version is already passed all 27 tests! Please see the test run at : https://colab.research.google.com/drive/1czS_m9zy5k-iSJbzA_DP1k1xAAC_sdkf?usp=sharing * fix quality * delete init weights * run fix copies * fix repo consis * del config_class, load_tf_weights They shoud be 'pytorch only' * add config_class back after removing it, test failed ... so totally only removing "use_tf_weights = None" on Lysandre suggestion * newline after .. note:: * import tf, np (Necessary for ModelIntegrationTest) * slow_test from_pretrained with from_pt=True At the moment we don't have TF weights (since we don't have official official TF model) Previously, I did not run slow test, so I missed this bug * Add simple TFDPRModelIntegrationTest Note that this is just a test that TF and Pytorch gives approx. the same output. However, I could not test with the official DPR repo's output yet * upload correct tf model * remove position_ids as missing keys Co-authored-by: Patrick von Platen Co-authored-by: patrickvonplaten --- docs/source/model_doc/dpr.rst | 19 + src/transformers/__init__.py | 14 + .../convert_pytorch_checkpoint_to_tf2.py | 22 + src/transformers/modeling_tf_auto.py | 3 + src/transformers/modeling_tf_dpr.py | 724 ++++++++++++++++++ src/transformers/utils/dummy_pt_objects.py | 9 + src/transformers/utils/dummy_tf_objects.py | 39 + tests/test_modeling_dpr.py | 35 + tests/test_modeling_tf_dpr.py | 260 +++++++ utils/check_repo.py | 8 + 10 files changed, 1133 insertions(+) create mode 100644 src/transformers/modeling_tf_dpr.py create mode 100644 tests/test_modeling_tf_dpr.py diff --git a/docs/source/model_doc/dpr.rst b/docs/source/model_doc/dpr.rst index f1c465f386..a9e6fdf7a8 100644 --- a/docs/source/model_doc/dpr.rst +++ b/docs/source/model_doc/dpr.rst @@ -99,3 +99,22 @@ DPRReader .. autoclass:: transformers.DPRReader :members: forward + +TFDPRContextEncoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.TFDPRContextEncoder + :members: call + +TFDPRQuestionEncoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.TFDPRQuestionEncoder + :members: call + + +TFDPRReader +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.TFDPRReader + :members: call diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index db493affbc..218bb5f03c 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -406,6 +406,9 @@ if is_torch_available(): DistilBertPreTrainedModel, ) from .modeling_dpr import ( + DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST, DPRContextEncoder, DPRPretrainedContextEncoder, DPRPretrainedQuestionEncoder, @@ -713,6 +716,17 @@ if is_tf_available(): TFDistilBertModel, TFDistilBertPreTrainedModel, ) + from .modeling_tf_dpr import ( + TF_DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + TF_DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + TF_DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST, + TFDPRContextEncoder, + TFDPRPretrainedContextEncoder, + TFDPRPretrainedQuestionEncoder, + TFDPRPretrainedReader, + TFDPRQuestionEncoder, + TFDPRReader, + ) from .modeling_tf_electra import ( TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST, TFElectraForMaskedLM, diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index 5e785b5d9b..5447ede65e 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -25,6 +25,9 @@ from transformers import ( CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, + DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, @@ -43,6 +46,7 @@ from transformers import ( CamembertConfig, CTRLConfig, DistilBertConfig, + DPRConfig, ElectraConfig, FlaubertConfig, GPT2Config, @@ -59,6 +63,9 @@ from transformers import ( TFCTRLLMHeadModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, + TFDPRContextEncoder, + TFDPRQuestionEncoder, + TFDPRReader, TFElectraForPreTraining, TFFlaubertWithLMHeadModel, TFGPT2LMHeadModel, @@ -98,6 +105,9 @@ if is_torch_available(): CTRLLMHeadModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, + DPRContextEncoder, + DPRQuestionEncoder, + DPRReader, ElectraForPreTraining, FlaubertWithLMHeadModel, GPT2LMHeadModel, @@ -147,6 +157,18 @@ MODEL_CLASSES = { BertForSequenceClassification, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), + "dpr": ( + DPRConfig, + TFDPRQuestionEncoder, + TFDPRContextEncoder, + TFDPRReader, + DPRQuestionEncoder, + DPRContextEncoder, + DPRReader, + DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST, + ), "gpt2": ( GPT2Config, TFGPT2LMHeadModel, diff --git a/src/transformers/modeling_tf_auto.py b/src/transformers/modeling_tf_auto.py index dcf694e339..5d16423a9e 100644 --- a/src/transformers/modeling_tf_auto.py +++ b/src/transformers/modeling_tf_auto.py @@ -43,6 +43,7 @@ from .configuration_auto import ( replace_list_option_in_docstrings, ) from .configuration_blenderbot import BlenderbotConfig +from .configuration_dpr import DPRConfig from .configuration_marian import MarianConfig from .configuration_mbart import MBartConfig from .configuration_pegasus import PegasusConfig @@ -87,6 +88,7 @@ from .modeling_tf_distilbert import ( TFDistilBertForTokenClassification, TFDistilBertModel, ) +from .modeling_tf_dpr import TFDPRQuestionEncoder from .modeling_tf_electra import ( TFElectraForMaskedLM, TFElectraForMultipleChoice, @@ -192,6 +194,7 @@ TF_MODEL_MAPPING = OrderedDict( (CTRLConfig, TFCTRLModel), (ElectraConfig, TFElectraModel), (FunnelConfig, TFFunnelModel), + (DPRConfig, TFDPRQuestionEncoder), ] ) diff --git a/src/transformers/modeling_tf_dpr.py b/src/transformers/modeling_tf_dpr.py new file mode 100644 index 0000000000..1b4b4f5bb5 --- /dev/null +++ b/src/transformers/modeling_tf_dpr.py @@ -0,0 +1,724 @@ +# coding=utf-8 +# Copyright 2018 DPR Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" TensorFlow DPR model for Open Domain Question Answering.""" + + +from dataclasses import dataclass +from typing import Optional, Tuple, Union + +import tensorflow as tf +from tensorflow import Tensor +from tensorflow.keras.layers import Dense + +from .configuration_dpr import DPRConfig +from .file_utils import ( + ModelOutput, + add_start_docstrings, + add_start_docstrings_to_model_forward, + replace_return_docstrings, +) +from .modeling_tf_bert import TFBertMainLayer +from .modeling_tf_outputs import TFBaseModelOutputWithPooling +from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list +from .tokenization_utils import BatchEncoding +from .utils import logging + + +logger = logging.get_logger(__name__) + +_CONFIG_FOR_DOC = "DPRConfig" + +TF_DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "facebook/dpr-ctx_encoder-single-nq-base", + "facebook/dpr-ctx_encoder-multiset-base", +] +TF_DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "facebook/dpr-question_encoder-single-nq-base", + "facebook/dpr-question_encoder-multiset-base", +] +TF_DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "facebook/dpr-reader-single-nq-base", + "facebook/dpr-reader-multiset-base", +] + + +########## +# Outputs +########## + + +@dataclass +class TFDPRContextEncoderOutput(ModelOutput): + r""" + Class for outputs of :class:`~transformers.TFDPRContextEncoder`. + + Args: + pooler_output: (:obj:``tf.Tensor`` of shape ``(batch_size, embeddings_size)``): + The DPR encoder outputs the `pooler_output` that corresponds to the context representation. Last layer + hidden-state of the first token of the sequence (classification token) further processed by a Linear layer. + This output is to be used to embed contexts for nearest neighbors queries with questions embeddings. + hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of + shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`tf.Tensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + pooler_output: tf.Tensor + hidden_states: Optional[Tuple[tf.Tensor]] = None + attentions: Optional[Tuple[tf.Tensor]] = None + + +@dataclass +class TFDPRQuestionEncoderOutput(ModelOutput): + """ + Class for outputs of :class:`~transformers.TFDPRQuestionEncoder`. + + Args: + pooler_output: (:obj:``tf.Tensor`` of shape ``(batch_size, embeddings_size)``): + The DPR encoder outputs the `pooler_output` that corresponds to the question representation. Last layer + hidden-state of the first token of the sequence (classification token) further processed by a Linear layer. + This output is to be used to embed questions for nearest neighbors queries with context embeddings. + hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of + shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`tf.Tensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + pooler_output: tf.Tensor + hidden_states: Optional[Tuple[tf.Tensor]] = None + attentions: Optional[Tuple[tf.Tensor]] = None + + +@dataclass +class TFDPRReaderOutput(ModelOutput): + """ + Class for outputs of :class:`~transformers.TFDPRReaderEncoder`. + + Args: + start_logits: (:obj:``tf.Tensor`` of shape ``(n_passages, sequence_length)``): + Logits of the start index of the span for each passage. + end_logits: (:obj:``tf.Tensor`` of shape ``(n_passages, sequence_length)``): + Logits of the end index of the span for each passage. + relevance_logits: (:obj:`tf.Tensor`` of shape ``(n_passages, )``): + Outputs of the QA classifier of the DPRReader that corresponds to the scores of each passage to answer the + question, compared to all the other passages. + hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): + Tuple of :obj:`tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of + shape :obj:`(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): + Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, + sequence_length, sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + start_logits: tf.Tensor + end_logits: tf.Tensor = None + relevance_logits: tf.Tensor = None + hidden_states: Optional[Tuple[tf.Tensor]] = None + attentions: Optional[Tuple[tf.Tensor]] = None + + +class TFDPREncoder(TFPreTrainedModel): + + base_model_prefix = "bert_model" + + def __init__(self, config: DPRConfig, *args, **kwargs): + super().__init__(config, *args, **kwargs) + + # resolve name conflict with TFBertMainLayer instead of TFBertModel + self.bert_model = TFBertMainLayer(config, name="bert_model") + self.bert_model.config = config + + assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero" + self.projection_dim = config.projection_dim + if self.projection_dim > 0: + self.encode_proj = Dense( + config.projection_dim, kernel_initializer=get_initializer(config.initializer_range), name="encode_proj" + ) + + def call( + self, + input_ids: Tensor, + attention_mask: Optional[Tensor] = None, + token_type_ids: Optional[Tensor] = None, + inputs_embeds: Optional[Tensor] = None, + output_attentions: bool = False, + output_hidden_states: bool = False, + return_dict: bool = None, + training: bool = False, + ) -> Union[TFBaseModelOutputWithPooling, Tuple[Tensor, ...]]: + + return_dict = return_dict if return_dict is not None else self.bert_model.return_dict + + outputs = self.bert_model( + inputs=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + training=training, + ) + sequence_output, pooled_output = outputs[:2] + pooled_output = sequence_output[:, 0, :] + if self.projection_dim > 0: + pooled_output = self.encode_proj(pooled_output) + + if not return_dict: + return (sequence_output, pooled_output) + outputs[2:] + + return TFBaseModelOutputWithPooling( + last_hidden_state=sequence_output, + pooler_output=pooled_output, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + @property + def embeddings_size(self) -> int: + if self.projection_dim > 0: + return self.projection_dim + return self.bert_model.config.hidden_size + + +class TFDPRSpanPredictor(TFPreTrainedModel): + + base_model_prefix = "encoder" + + def __init__(self, config: DPRConfig, *args, **kwargs): + super().__init__(config, *args, **kwargs) + self.encoder = TFDPREncoder(config, name="encoder") + + self.qa_outputs = Dense(2, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs") + self.qa_classifier = Dense( + 1, kernel_initializer=get_initializer(config.initializer_range), name="qa_classifier" + ) + + def call( + self, + input_ids: Tensor, + attention_mask: Tensor, + inputs_embeds: Optional[Tensor] = None, + output_attentions: bool = False, + output_hidden_states: bool = False, + return_dict: bool = False, + training: bool = False, + ) -> Union[TFDPRReaderOutput, Tuple[Tensor, ...]]: + # notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length + n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2] + # feed encoder + + outputs = self.encoder( + input_ids, + attention_mask=attention_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + training=training, + ) + sequence_output = outputs[0] + + # compute logits + logits = self.qa_outputs(sequence_output) + start_logits, end_logits = tf.split(logits, 2, axis=-1) + start_logits = tf.squeeze(start_logits, axis=-1) + end_logits = tf.squeeze(end_logits, axis=-1) + relevance_logits = self.qa_classifier(sequence_output[:, 0, :]) + + # resize + start_logits = tf.reshape(start_logits, [n_passages, sequence_length]) + end_logits = tf.reshape(end_logits, [n_passages, sequence_length]) + relevance_logits = tf.reshape(relevance_logits, [n_passages]) + + if not return_dict: + return (start_logits, end_logits, relevance_logits) + outputs[2:] + + return TFDPRReaderOutput( + start_logits=start_logits, + end_logits=end_logits, + relevance_logits=relevance_logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + +################## +# PreTrainedModel +################## + + +class TFDPRPretrainedContextEncoder(TFPreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = DPRConfig + base_model_prefix = "ctx_encoder" + + +class TFDPRPretrainedQuestionEncoder(TFPreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = DPRConfig + base_model_prefix = "question_encoder" + + +class TFDPRPretrainedReader(TFPreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = DPRConfig + base_model_prefix = "reader" + + +############### +# Actual Models +############### + + +TF_DPR_START_DOCSTRING = r""" + + This model inherits from :class:`~transformers.TFPreTrainedModel`. Check the superclass documentation for the + generic methods the library implements for all its model (such as downloading or saving, resizing the input + embeddings, pruning heads etc.) + + This model is also a Tensorflow `tf.keras.Model `__ + subclass. Use it as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to + general usage and behavior. + + .. note:: + + TF 2.0 models accepts two formats as inputs: - having all inputs as keyword arguments (like PyTorch models), or + - having all inputs as a list, tuple or dict in the first positional arguments. This second option is useful + when using :meth:`tf.keras.Model.fit` method which currently requires having all the tensors in the first + argument of the model call function: :obj:`model(inputs)`. If you choose this second option, there are three + possibilities you can use to gather all the input Tensors in the first positional argument : - a single Tensor + with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)` - a list of varying length with one or + several input Tensors IN THE ORDER given in the docstring: :obj:`model([input_ids, attention_mask])` or + :obj:`model([input_ids, attention_mask, token_type_ids])` - a dictionary with one or several input Tensors + associated to the input names given in the docstring: :obj:`model({"input_ids": input_ids, "token_type_ids": + token_type_ids})` + + Parameters: + config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the :meth:`~transformers.TFPreTrainedModel.from_pretrained` method to load the + model weights. +""" + +TF_DPR_ENCODERS_INPUTS_DOCSTRING = r""" + Args: + input_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be + formatted with [CLS] and [SEP] tokens as follows: + + (a) For sequence pairs (for a pair title+text for example): + + ``tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]`` + + ``token_type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1`` + + (b) For single sequences (for a question for example): + + ``tokens: [CLS] the dog is hairy . [SEP]`` + + ``token_type_ids: 0 0 0 0 0 0 0`` + + DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right + rather than the left. + + Indices can be obtained using :class:`~transformers.DPRTokenizer`. See + :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for + details. + attention_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + `What are attention masks? <../glossary.html#attention-mask>`__ + token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, + 1]``: + + - 0 corresponds to a `sentence A` token, + - 1 corresponds to a `sentence B` token. + + `What are token type IDs? <../glossary.html#token-type-ids>`_ + inputs_embeds (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert :obj:`input_ids` indices into associated + vectors than the model's internal embedding lookup matrix. + output_attentions (:obj:`bool`, `optional`): + Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned + tensors for more detail. + output_hidden_states (:obj:`bool`, `optional`): + Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for + more detail. + return_dict (:obj:`bool`, `optional`): + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. +""" + +TF_DPR_READER_INPUTS_DOCSTRING = r""" + Args: + input_ids: (:obj:`Numpy array` or :obj:`tf.Tensor` of shapes :obj:`(n_passages, sequence_length)`): + Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question + and 2) the passages titles and 3) the passages texts To match pretraining, DPR :obj:`input_ids` sequence + should be formatted with [CLS] and [SEP] with the format: + + ``[CLS] [SEP] [SEP] `` + + DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right + rather than the left. + + Indices can be obtained using :class:`~transformers.DPRReaderTokenizer`. See this class documentation for + more details. + attention_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(n_passages, sequence_length)`, `optional`): + Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + `What are attention masks? <../glossary.html#attention-mask>`__ + inputs_embeds (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(n_passages, sequence_length, hidden_size)`, `optional`): + Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert :obj:`input_ids` indices into associated + vectors than the model's internal embedding lookup matrix. + output_attentions (:obj:`bool`, `optional`): + Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned + tensors for more detail. + output_hidden_states (:obj:`bool`, `optional`): + Whether or not to rturn the hidden states of all layers. See ``hidden_states`` under returned tensors for + more detail. + return_dict (:obj:`bool`, `optional`): + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. +""" + + +@add_start_docstrings( + "The bare DPRContextEncoder transformer outputting pooler outputs as context representations.", + TF_DPR_START_DOCSTRING, +) +class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): + def __init__(self, config: DPRConfig, *args, **kwargs): + super().__init__(config, *args, **kwargs) + self.config = config + self.ctx_encoder = TFDPREncoder(config, name="ctx_encoder") + + def get_input_embeddings(self): + return self.ctx_encoder.bert_model.get_input_embeddings() + + @add_start_docstrings_to_model_forward(TF_DPR_ENCODERS_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=TFDPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC) + def call( + self, + inputs, + attention_mask: Optional[Tensor] = None, + token_type_ids: Optional[Tensor] = None, + inputs_embeds: Optional[Tensor] = None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + training: bool = False, + ) -> Union[TFDPRContextEncoderOutput, Tuple[Tensor, ...]]: + r""" + Return: + + Examples:: + + >>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer + >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') + >>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', return_dict=True, from_pt=True) + >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"] + >>> embeddings = model(input_ids).pooler_output + """ + + if isinstance(inputs, (tuple, list)): + input_ids = inputs[0] + attention_mask = inputs[1] if len(inputs) > 1 else attention_mask + inputs_embeds = inputs[2] if len(inputs) > 2 else inputs_embeds + output_attentions = inputs[3] if len(inputs) > 3 else output_attentions + output_hidden_states = inputs[4] if len(inputs) > 4 else output_hidden_states + return_dict = inputs[5] if len(inputs) > 5 else return_dict + assert len(inputs) <= 6, "Too many inputs." + elif isinstance(inputs, (dict, BatchEncoding)): + input_ids = inputs.get("input_ids") + attention_mask = inputs.get("attention_mask", attention_mask) + inputs_embeds = inputs.get("inputs_embeds", inputs_embeds) + output_attentions = inputs.get("output_attentions", output_attentions) + output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) + return_dict = inputs.get("return_dict", return_dict) + assert len(inputs) <= 6, "Too many inputs." + else: + input_ids = inputs + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") + elif input_ids is not None: + input_shape = shape_list(input_ids) + elif inputs_embeds is not None: + input_shape = shape_list(inputs_embeds)[:-1] + else: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + if attention_mask is None: + attention_mask = ( + tf.ones(input_shape, dtype=tf.dtypes.int32) + if input_ids is None + else (input_ids != self.config.pad_token_id) + ) + if token_type_ids is None: + token_type_ids = tf.zeros(input_shape, dtype=tf.dtypes.int32) + + outputs = self.ctx_encoder( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + training=training, + ) + + if not return_dict: + return outputs[1:] + return TFDPRContextEncoderOutput( + pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions + ) + + +@add_start_docstrings( + "The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.", + TF_DPR_START_DOCSTRING, +) +class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): + def __init__(self, config: DPRConfig, *args, **kwargs): + super().__init__(config, *args, **kwargs) + self.config = config + self.question_encoder = TFDPREncoder(config, name="question_encoder") + + def get_input_embeddings(self): + return self.question_encoder.bert_model.get_input_embeddings() + + @add_start_docstrings_to_model_forward(TF_DPR_ENCODERS_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=TFDPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC) + def call( + self, + inputs, + attention_mask: Optional[Tensor] = None, + token_type_ids: Optional[Tensor] = None, + inputs_embeds: Optional[Tensor] = None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + training: bool = False, + ) -> Union[TFDPRQuestionEncoderOutput, Tuple[Tensor, ...]]: + r""" + Return: + + Examples:: + + >>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer + >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base') + >>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', return_dict=True, from_pt=True) + >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"] + >>> embeddings = model(input_ids).pooler_output + """ + + if isinstance(inputs, (tuple, list)): + input_ids = inputs[0] + attention_mask = inputs[1] if len(inputs) > 1 else attention_mask + inputs_embeds = inputs[2] if len(inputs) > 2 else inputs_embeds + output_attentions = inputs[3] if len(inputs) > 3 else output_attentions + output_hidden_states = inputs[4] if len(inputs) > 4 else output_hidden_states + return_dict = inputs[5] if len(inputs) > 5 else return_dict + assert len(inputs) <= 6, "Too many inputs." + elif isinstance(inputs, (dict, BatchEncoding)): + input_ids = inputs.get("input_ids") + attention_mask = inputs.get("attention_mask", attention_mask) + inputs_embeds = inputs.get("inputs_embeds", inputs_embeds) + output_attentions = inputs.get("output_attentions", output_attentions) + output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) + return_dict = inputs.get("return_dict", return_dict) + assert len(inputs) <= 6, "Too many inputs." + else: + input_ids = inputs + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") + elif input_ids is not None: + input_shape = shape_list(input_ids) + elif inputs_embeds is not None: + input_shape = shape_list(inputs_embeds)[:-1] + else: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + if attention_mask is None: + attention_mask = ( + tf.ones(input_shape, dtype=tf.dtypes.int32) + if input_ids is None + else (input_ids != self.config.pad_token_id) + ) + if token_type_ids is None: + token_type_ids = tf.zeros(input_shape, dtype=tf.dtypes.int32) + + outputs = self.question_encoder( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + training=training, + ) + + if not return_dict: + return outputs[1:] + return TFDPRQuestionEncoderOutput( + pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions + ) + + +@add_start_docstrings( + "The bare DPRReader transformer outputting span predictions.", + TF_DPR_START_DOCSTRING, +) +class TFDPRReader(TFDPRPretrainedReader): + def __init__(self, config: DPRConfig, *args, **kwargs): + super().__init__(config, *args, **kwargs) + self.config = config + self.span_predictor = TFDPRSpanPredictor(config, name="span_predictor") + + def get_input_embeddings(self): + return self.span_predictor.encoder.bert_model.get_input_embeddings() + + @add_start_docstrings_to_model_forward(TF_DPR_READER_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=TFDPRReaderOutput, config_class=_CONFIG_FOR_DOC) + def call( + self, + inputs, + attention_mask: Optional[Tensor] = None, + inputs_embeds: Optional[Tensor] = None, + output_attentions: bool = None, + output_hidden_states: bool = None, + return_dict=None, + training: bool = False, + ) -> Union[TFDPRReaderOutput, Tuple[Tensor, ...]]: + r""" + Return: + + Examples:: + + >>> from transformers import TFDPRReader, DPRReaderTokenizer + >>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base') + >>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', return_dict=True, from_pt=True) + >>> encoded_inputs = tokenizer( + ... questions=["What is love ?"], + ... titles=["Haddaway"], + ... texts=["'What Is Love' is a song recorded by the artist Haddaway"], + ... return_tensors='tf' + ... ) + >>> outputs = model(encoded_inputs) + >>> start_logits = outputs.start_logits + >>> end_logits = outputs.end_logits + >>> relevance_logits = outputs.relevance_logits + + """ + if isinstance(inputs, (tuple, list)): + input_ids = inputs[0] + attention_mask = inputs[1] if len(inputs) > 1 else attention_mask + inputs_embeds = inputs[2] if len(inputs) > 2 else inputs_embeds + output_attentions = inputs[3] if len(inputs) > 3 else output_attentions + output_hidden_states = inputs[4] if len(inputs) > 4 else output_hidden_states + return_dict = inputs[5] if len(inputs) > 5 else return_dict + assert len(inputs) <= 6, "Too many inputs." + elif isinstance(inputs, (dict, BatchEncoding)): + input_ids = inputs.get("input_ids") + attention_mask = inputs.get("attention_mask", attention_mask) + inputs_embeds = inputs.get("inputs_embeds", inputs_embeds) + output_attentions = inputs.get("output_attentions", output_attentions) + output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) + return_dict = inputs.get("return_dict", return_dict) + assert len(inputs) <= 6, "Too many inputs." + else: + input_ids = inputs + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") + elif input_ids is not None: + input_shape = shape_list(input_ids) + elif inputs_embeds is not None: + input_shape = shape_list(inputs_embeds)[:-1] + else: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + if attention_mask is None: + attention_mask = tf.ones(input_shape, dtype=tf.dtypes.int32) + + return self.span_predictor( + input_ids, + attention_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + training=training, + ) diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index ce4fcfc7ce..c0702985a0 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -735,6 +735,15 @@ class DistilBertPreTrainedModel: requires_pytorch(self) +DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + class DPRContextEncoder: def __init__(self, *args, **kwargs): requires_pytorch(self) diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py index 12db3571a1..8eb912a1a2 100644 --- a/src/transformers/utils/dummy_tf_objects.py +++ b/src/transformers/utils/dummy_tf_objects.py @@ -495,6 +495,45 @@ class TFDistilBertPreTrainedModel: requires_tf(self) +TF_DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +TF_DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +TF_DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +class TFDPRContextEncoder: + def __init__(self, *args, **kwargs): + requires_tf(self) + + +class TFDPRPretrainedContextEncoder: + def __init__(self, *args, **kwargs): + requires_tf(self) + + +class TFDPRPretrainedQuestionEncoder: + def __init__(self, *args, **kwargs): + requires_tf(self) + + +class TFDPRPretrainedReader: + def __init__(self, *args, **kwargs): + requires_tf(self) + + +class TFDPRQuestionEncoder: + def __init__(self, *args, **kwargs): + requires_tf(self) + + +class TFDPRReader: + def __init__(self, *args, **kwargs): + requires_tf(self) + + TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = None diff --git a/tests/test_modeling_dpr.py b/tests/test_modeling_dpr.py index 07a21e00bf..2c9ad7f250 100644 --- a/tests/test_modeling_dpr.py +++ b/tests/test_modeling_dpr.py @@ -24,6 +24,8 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention if is_torch_available(): + import torch + from transformers import BertConfig, DPRConfig, DPRContextEncoder, DPRQuestionEncoder, DPRReader from transformers.modeling_dpr import ( DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, @@ -227,3 +229,36 @@ class DPRModelTest(ModelTesterMixin, unittest.TestCase): for model_name in DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = DPRReader.from_pretrained(model_name) self.assertIsNotNone(model) + + +@require_torch +class DPRModelIntegrationTest(unittest.TestCase): + @slow + def test_inference_no_head(self): + model = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base", return_dict=False) + model.to(torch_device) + + input_ids = torch.tensor( + [[101, 7592, 1010, 2003, 2026, 3899, 10140, 1029, 102]], dtype=torch.long, device=torch_device + ) # [CLS] hello, is my dog cute? [SEP] + output = model(input_ids)[0] # embedding shape = (1, 768) + # compare the actual values for a slice. + expected_slice = torch.tensor( + [ + [ + 0.03236253, + 0.12753335, + 0.16818509, + 0.00279786, + 0.3896933, + 0.24264945, + 0.2178971, + -0.02335227, + -0.08481959, + -0.14324117, + ] + ], + dtype=torch.float, + device=torch_device, + ) + self.assertTrue(torch.allclose(output[:, :10], expected_slice, atol=1e-4)) diff --git a/tests/test_modeling_tf_dpr.py b/tests/test_modeling_tf_dpr.py new file mode 100644 index 0000000000..737fcdb308 --- /dev/null +++ b/tests/test_modeling_tf_dpr.py @@ -0,0 +1,260 @@ +# coding=utf-8 +# Copyright 2020 Huggingface +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest + +from transformers import is_tf_available +from transformers.testing_utils import require_tf, slow + +from .test_configuration_common import ConfigTester +from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor + + +if is_tf_available(): + import numpy + import tensorflow as tf + + from transformers import ( + TF_DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + TF_DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, + TF_DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST, + BertConfig, + DPRConfig, + TFDPRContextEncoder, + TFDPRQuestionEncoder, + TFDPRReader, + ) + + +class TFDPRModelTester: + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + projection_dim=0, + ): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope + self.projection_dim = projection_dim + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor( + [self.batch_size, self.seq_length], vocab_size=2 + ) # follow test_modeling_tf_ctrl.py + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = BertConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + is_decoder=False, + initializer_range=self.initializer_range, + # MODIFY + return_dict=False, + ) + config = DPRConfig(projection_dim=self.projection_dim, **config.to_dict()) + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def create_and_check_dpr_context_encoder( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFDPRContextEncoder(config=config) + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) + result = model(input_ids, token_type_ids=token_type_ids) + result = model(input_ids, return_dict=True) # MODIFY + self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.projection_dim or self.hidden_size)) + + def create_and_check_dpr_question_encoder( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFDPRQuestionEncoder(config=config) + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) + result = model(input_ids, token_type_ids=token_type_ids) + result = model(input_ids, return_dict=True) # MODIFY + self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.projection_dim or self.hidden_size)) + + def create_and_check_dpr_reader( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = TFDPRReader(config=config) + result = model(input_ids, attention_mask=input_mask, return_dict=True) # MODIFY + + self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.relevance_logits.shape, (self.batch_size,)) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids} + return config, inputs_dict + + +@require_tf +class TFDPRModelTest(TFModelTesterMixin, unittest.TestCase): + + all_model_classes = ( + ( + TFDPRContextEncoder, + TFDPRQuestionEncoder, + TFDPRReader, + ) + if is_tf_available() + else () + ) + + test_resize_embeddings = False + test_missing_keys = False + test_pruning = False + test_head_masking = False + + def setUp(self): + self.model_tester = TFDPRModelTester(self) + self.config_tester = ConfigTester(self, config_class=DPRConfig, hidden_size=37) + + def test_config(self): + self.config_tester.run_common_tests() + + def test_dpr_context_encoder_model(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_dpr_context_encoder(*config_and_inputs) + + def test_dpr_question_encoder_model(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_dpr_question_encoder(*config_and_inputs) + + def test_dpr_reader_model(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_dpr_reader(*config_and_inputs) + + @slow + def test_model_from_pretrained(self): + for model_name in TF_DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: + model = TFDPRContextEncoder.from_pretrained(model_name, from_pt=True) + self.assertIsNotNone(model) + + for model_name in TF_DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: + model = TFDPRContextEncoder.from_pretrained(model_name, from_pt=True) + self.assertIsNotNone(model) + + for model_name in TF_DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: + model = TFDPRQuestionEncoder.from_pretrained(model_name, from_pt=True) + self.assertIsNotNone(model) + + for model_name in TF_DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: + model = TFDPRReader.from_pretrained(model_name, from_pt=True) + self.assertIsNotNone(model) + + +@require_tf +class TFDPRModelIntegrationTest(unittest.TestCase): + @slow + def test_inference_no_head(self): + model = TFDPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base", return_dict=False) + + input_ids = tf.constant( + [[101, 7592, 1010, 2003, 2026, 3899, 10140, 1029, 102]] + ) # [CLS] hello, is my dog cute? [SEP] + output = model(input_ids)[0] # embedding shape = (1, 768) + # compare the actual values for a slice. + expected_slice = tf.constant( + [ + [ + 0.03236253, + 0.12753335, + 0.16818509, + 0.00279786, + 0.3896933, + 0.24264945, + 0.2178971, + -0.02335227, + -0.08481959, + -0.14324117, + ] + ] + ) + self.assertTrue(numpy.allclose(output[:, :10].numpy(), expected_slice.numpy(), atol=1e-4)) diff --git a/utils/check_repo.py b/utils/check_repo.py index a563ff9471..678d084742 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -33,6 +33,8 @@ IGNORE_NON_TESTED = [ "DPRSpanPredictor", # Building part of bigger (tested) model. "ReformerForMaskedLM", # Needs to be setup as decoder. "T5Stack", # Building part of bigger (tested) model. + "TFDPREncoder", # Building part of bigger (tested) model. + "TFDPRSpanPredictor", # Building part of bigger (tested) model. "TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?) "TFRobertaForMultipleChoice", # TODO: fix ] @@ -57,6 +59,8 @@ IGNORE_NON_DOCUMENTED = [ "DPREncoder", # Building part of bigger (documented) model. "DPRSpanPredictor", # Building part of bigger (documented) model. "T5Stack", # Building part of bigger (tested) model. + "TFDPREncoder", # Building part of bigger (documented) model. + "TFDPRSpanPredictor", # Building part of bigger (documented) model. "TFElectraMainLayer", # Building part of bigger (documented) model (should it be a TFPreTrainedModel ?) ] @@ -87,6 +91,10 @@ IGNORE_NON_AUTO_CONFIGURED = [ "RagSequenceForGeneration", "RagTokenForGeneration", "T5Stack", + "TFDPRContextEncoder", + "TFDPREncoder", + "TFDPRReader", + "TFDPRSpanPredictor", "TFFunnelBaseModel", "TFGPT2DoubleHeadsModel", "TFOpenAIGPTDoubleHeadsModel",