Tapas tf (#13393)

* TF Tapas first commit * updated docs * updated logger message * updated pytorch weight conversion script to support scalar array * added use_cache to tapas model config to work properly with tf input_processing * 1. rm embeddings_sum 2. added # Copied 3. + TFTapasMLMHead 4. and lot other small fixes * updated docs * + test for tapas * updated testing_utils to check is_tensorflow_probability_available * converted model logits post processing using numpy to work with both PT and TF models * + TFAutoModelForTableQuestionAnswering * added TF support * added test for TFAutoModelForTableQuestionAnswering * added test for TFAutoModelForTableQuestionAnswering pipeline * updated auto model docs * fixed typo in import * added tensorflow_probability to run tests * updated MLM head * updated tapas.rst with TF model docs * fixed optimizer import in docs * updated convert to np data from pt model is not `transformers.tokenization_utils_base.BatchEncoding` after pipeline upgrade * updated pipeline: 1. with torch.no_gard removed, pipeline forward handles 2. token_type_ids converted to numpy * updated docs. * removed `use_cache` from config * removed floats_tensor * updated code comment * updated Copyright Year and logits_aggregation Optional * updated docs and comments * updated docstring * fixed model weight loading * make fixup * fix indentation * added tf slow pipeline test * pip upgrade * upgrade python to 3.7 * removed from_pt from tests * revert commit f18cfa9
2021-11-30 15:37:55 +05:30
parent 6fc38adff2
commit c468a87a69
19 changed files with 4324 additions and 75 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -65,7 +65,7 @@ jobs:
    run_tests_torch_and_tf:
        working_directory: ~/transformers
        docker:
-            - image: circleci/python:3.6
+            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
            RUN_PT_TF_CROSS_TESTS: yes
@@ -82,6 +82,7 @@ jobs:
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]
            - run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
+            - run: pip install tensorflow_probability
            - save_cache:
                key: v0.4-{{ checksum "setup.py" }}
                paths:
@@ -118,6 +119,7 @@ jobs:
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]
            - run: pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cpu.html
+            - run: pip install tensorflow_probability
            - save_cache:
                key: v0.4-{{ checksum "setup.py" }}
                paths:
@@ -278,6 +280,7 @@ jobs:
                      - v0.4-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]
+            - run: pip install tensorflow_probability
            - save_cache:
                  key: v0.4-tf-{{ checksum "setup.py" }}
                  paths:
@@ -311,6 +314,7 @@ jobs:
                      - v0.4-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]
+            - run: pip install tensorflow_probability
            - save_cache:
                  key: v0.4-tf-{{ checksum "setup.py" }}
                  paths:
@@ -468,6 +472,7 @@ jobs:
                      - v0.4-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
+            - run: pip install tensorflow_probability
            - save_cache:
                  key: v0.4-tf-{{ checksum "setup.py" }}
                  paths:
@@ -502,6 +507,7 @@ jobs:
                      - v0.4-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
+            - run: pip install tensorflow_probability
            - save_cache:
                  key: v0.4-tf-{{ checksum "setup.py" }}
                  paths:
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -499,7 +499,7 @@ Flax), PyTorch, and/or TensorFlow.
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
 |             T5              |       ✅       |       ✅       |       ✅        |         ✅         |      ✅      |
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
-|            TAPAS            |       ✅       |       ❌       |       ✅        |         ❌         |      ❌      |
+|            TAPAS            |       ✅       |       ❌       |       ✅        |         ✅         |      ❌      |
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
 |       Transformer-XL        |       ✅       |       ❌       |       ✅        |         ✅         |      ❌      |
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
--- a/docs/source/model_doc/auto.rst
+++ b/docs/source/model_doc/auto.rst
@@ -265,6 +265,13 @@ TFAutoModelForMultipleChoice
    :members:


+TFAutoModelForTableQuestionAnswering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.TFAutoModelForTableQuestionAnswering
+    :members:
+
+
 TFAutoModelForTokenClassification
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/tapas.rst
+++ b/docs/source/model_doc/tapas.rst
@@ -49,7 +49,8 @@ entailment (a binary classification task). For more details, see their follow-up
 intermediate pre-training <https://www.aclweb.org/anthology/2020.findings-emnlp.27/>`__ by Julian Martin Eisenschlos,
 Syrine Krichene and Thomas Müller.

-This model was contributed by `nielsr <https://huggingface.co/nielsr>`__. The original code can be found `here
+This model was contributed by `nielsr <https://huggingface.co/nielsr>`__. The Tensorflow version of this model was
+contributed by `kamalkraj <https://huggingface.co/kamalkraj>`__. The original code can be found `here
 <https://github.com/google-research/tapas>`__.

 Tips:
@@ -130,6 +131,24 @@ for your environment):
        >>> config = TapasConfig('google-base-finetuned-wikisql-supervised')
        >>> model = TapasForQuestionAnswering.from_pretrained('google/tapas-base', config=config)

+In TensorFlow, this can be done as follows (make sure to have installed the `tensorflow_probability dependency
+<https://github.com/tensorflow/probability`>__ for your environment):
+
+.. code-block::
+
+        >>> from transformers import TapasConfig, TFTapasForQuestionAnswering
+
+        >>> # for example, the base sized model with default SQA configuration
+        >>> model = TFTapasForQuestionAnswering.from_pretrained('google/tapas-base')
+
+        >>> # or, the base sized model with WTQ configuration
+        >>> config = TapasConfig.from_pretrained('google/tapas-base-finetuned-wtq')
+        >>> model = TFTapasForQuestionAnswering.from_pretrained('google/tapas-base', config=config)
+
+        >>> # or, the base sized model with WikiSQL configuration
+        >>> config = TapasConfig('google-base-finetuned-wikisql-supervised')
+        >>> model = TFTapasForQuestionAnswering.from_pretrained('google/tapas-base', config=config)
+

 Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also
 experiment by defining any hyperparameters you want when initializing :class:`~transformers.TapasConfig`, and then
@@ -142,10 +161,21 @@ way. Here's an example:
        >>> from transformers import TapasConfig, TapasForQuestionAnswering

        >>> # you can initialize the classification heads any way you want (see docs of TapasConfig)
-        >>> config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True, select_one_column=False)
+        >>> config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
        >>> # initializing the pre-trained base sized model with our custom classification heads
        >>> model = TapasForQuestionAnswering.from_pretrained('google/tapas-base', config=config)

+And here is the equivalent code for TensorFlow:
+
+.. code-block::
+
+        >>> from transformers import TapasConfig, TFTapasForQuestionAnswering
+
+        >>> # you can initialize the classification heads any way you want (see docs of TapasConfig)
+        >>> config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+        >>> # initializing the pre-trained base sized model with our custom classification heads
+        >>> model = TFTapasForQuestionAnswering.from_pretrained('google/tapas-base', config=config)
+
 What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned
 checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See `here
 <https://github.com/google-research/tapas/issues/91#issuecomment-735719340>`__ for more info.
@@ -180,12 +210,13 @@ SQA format. The author explains this `here
 are not perfect (the ``answer_coordinates`` and ``float_answer`` fields are populated based on the ``answer_text``),
 meaning that WTQ and WikiSQL results could actually be improved.

-**STEP 3: Convert your data into PyTorch tensors using TapasTokenizer**
+**STEP 3: Convert your data into PyTorch/TensorFlow tensors using TapasTokenizer**

 Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular
 data), you can then use :class:`~transformers.TapasTokenizer` to convert table-question pairs into :obj:`input_ids`,
 :obj:`attention_mask`, :obj:`token_type_ids` and so on. Again, based on which of the three cases you picked above,
-:class:`~transformers.TapasForQuestionAnswering` requires different inputs to be fine-tuned:
+:class:`~transformers.TapasForQuestionAnswering`/:class:`~transformers.TFTapasForQuestionAnswering` requires different
+inputs to be fine-tuned:

 +------------------------------------+----------------------------------------------------------------------------------------------+
 | **Task**                           | **Required inputs**                                                                          |
@@ -220,6 +251,8 @@ are already in the TSV file of step 2. Here's an example:
        {'input_ids': tensor([[ ... ]]), 'attention_mask': tensor([[...]]), 'token_type_ids': tensor([[[...]]]),
        'numeric_values': tensor([[ ... ]]), 'numeric_values_scale: tensor([[ ... ]]), labels: tensor([[ ... ]])}

+Set `return_tensors='tf'` when calling the tokenizer to prepare data for the TF models.
+
 Note that :class:`~transformers.TapasTokenizer` expects the data of the table to be **text-only**. You can use
 ``.astype(str)`` on a dataframe to turn it into text-only data. Of course, this only shows how to encode a single
 training example. It is advised to create a PyTorch dataset and a corresponding dataloader:
@@ -261,15 +294,67 @@ training example. It is advised to create a PyTorch dataset and a corresponding
        >>> train_dataset = TableDataset(data, tokenizer)
        >>> train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)

+And here is the equivalent code for TensorFlow:
+
+.. code-block::
+
+        >>> import tensorflow as tf
+        >>> import pandas as pd
+
+        >>> tsv_path = "your_path_to_the_tsv_file"
+        >>> table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+
+        >>> class TableDataset:
+        ...     def __init__(self, data, tokenizer):
+        ...         self.data = data
+        ...         self.tokenizer = tokenizer
+        ...
+        ...     def __iter__(self):
+        ...         for idx in range(self.__len__()):
+        ...             item = self.data.iloc[idx]
+        ...             table = pd.read_csv(table_csv_path + item.table_file).astype(str) # be sure to make your table data text only
+        ...             encoding = self.tokenizer(table=table, 
+        ...                                   queries=item.question, 
+        ...                                   answer_coordinates=item.answer_coordinates, 
+        ...                                   answer_text=item.answer_text,
+        ...                                   truncation=True,
+        ...                                   padding="max_length",
+        ...                                   return_tensors="tf"
+        ...             )
+        ...             # remove the batch dimension which the tokenizer adds by default
+        ...             encoding = {key: tf.squeeze(val,0) for key, val in encoding.items()}
+        ...             # add the float_answer which is also required (weak supervision for aggregation case)
+        ...             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer,dtype=tf.float32)
+        ...             yield encoding['input_ids'], encoding['attention_mask'], encoding['numeric_values'], \
+        ...                   encoding['numeric_values_scale'], encoding['token_type_ids'], encoding['labels'], \
+        ...                   encoding['float_answer']
+        ...
+        ...     def __len__(self):
+        ...        return len(self.data)
+
+        >>> data = pd.read_csv(tsv_path, sep='\t')
+        >>> train_dataset = TableDataset(data, tokenizer)
+        >>> output_signature = (
+        ... tf.TensorSpec(shape=(512,), dtype=tf.int32),
+        ... tf.TensorSpec(shape=(512,), dtype=tf.int32),
+        ... tf.TensorSpec(shape=(512,), dtype=tf.float32),
+        ... tf.TensorSpec(shape=(512,), dtype=tf.float32),
+        ... tf.TensorSpec(shape=(512,7), dtype=tf.int32),
+        ... tf.TensorSpec(shape=(512,), dtype=tf.int32),
+        ... tf.TensorSpec(shape=(512,), dtype=tf.float32))
+        >>> train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
+
 Note that here, we encode each table-question pair independently. This is fine as long as your dataset is **not
 conversational**. In case your dataset involves conversational questions (such as in SQA), then you should first group
 together the ``queries``, ``answer_coordinates`` and ``answer_text`` per table (in the order of their ``position``
 index) and batch encode each table with its questions. This will make sure that the ``prev_labels`` token types (see
 docs of :class:`~transformers.TapasTokenizer`) are set correctly. See `this notebook
 <https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb>`__
-for more info.
+for more info. See `this notebook
+<https://github.com/kamalkraj/Tapas-Tutorial/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb>`__
+for more info regarding using the TensorFlow model.

-**STEP 4: Train (fine-tune) TapasForQuestionAnswering**
+**STEP 4: Train (fine-tune) TapasForQuestionAnswering/TFTapasForQuestionAnswering**

 You can then fine-tune :class:`~transformers.TapasForQuestionAnswering` using native PyTorch as follows (shown here for
 the weak supervision for aggregation case):
@@ -316,6 +401,52 @@ the weak supervision for aggregation case):
        ...         loss.backward()
        ...         optimizer.step()

+
+Equivalently, fine-tuning :class:`~transformers.TFTapasForQuestionAnswering` in native TensorFlow can be done as
+follows (shown here for the weak supervision for aggregation case):
+
+.. code-block::
+
+        >>> import tensorflow as tf
+        >>> from transformers import TapasConfig, TFTapasForQuestionAnswering
+
+        >>> # this is the default WTQ configuration
+        >>> config = TapasConfig(
+        ...            num_aggregation_labels = 4,
+        ...            use_answer_as_supervision = True,
+        ...            answer_loss_cutoff = 0.664694,
+        ...            cell_selection_preference = 0.207951,
+        ...            huber_loss_delta = 0.121194,
+        ...            init_cell_selection_weights_to_zero = True,
+        ...            select_one_column = True,
+        ...            allow_empty_column_selection = False,
+        ...            temperature = 0.0352513,
+        ... )
+        >>> model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+        >>> optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+
+        >>> for epoch in range(2):  # loop over the dataset multiple times
+        ...    for idx, batch in enumerate(train_dataloader):
+        ...         # get the inputs; 
+        ...         input_ids = batch[0]
+        ...         attention_mask = batch[1]
+        ...         token_type_ids = batch[4]
+        ...         labels = batch[-1]
+        ...         numeric_values = batch[2]
+        ...         numeric_values_scale = batch[3]
+        ...         float_answer = batch[6]
+
+        ...         # forward + backward + optimize
+        ...         with tf.GradientTape() as tape:
+        ...              outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, 
+        ...                        labels=labels, numeric_values=numeric_values, numeric_values_scale=numeric_values_scale, 
+        ...                        float_answer=float_answer )
+        ...         grads = tape.gradient(outputs.loss, model.trainable_weights)
+        ...         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+
+
 Usage: inference
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

@@ -380,10 +511,68 @@ of that:
        What is the total number of movies?
        Predicted answer: SUM > 87, 53, 69

+
+And here is the equivalent code for TensorFlow:
+
+.. code-block::
+
+        >>> from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+        >>> import pandas as pd 
+
+        >>> model_name = 'google/tapas-base-finetuned-wtq'
+        >>> model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+        >>> tokenizer = TapasTokenizer.from_pretrained(model_name)
+
+        >>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], 'Number of movies': ["87", "53", "69"]}
+        >>> queries = ["What is the name of the first actor?", "How many movies has George Clooney played in?", "What is the total number of movies?"]
+        >>> table = pd.DataFrame.from_dict(data)
+        >>> inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="tf") 
+        >>> outputs = model(**inputs)
+        >>> predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+        ...         inputs, 
+        ...         outputs.logits, 
+        ...         outputs.logits_aggregation
+        ... )
+
+        >>> # let's print out the results:
+        >>> id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
+        >>> aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+
+        >>> answers = []
+        >>> for coordinates in predicted_answer_coordinates:
+        ...   if len(coordinates) == 1:
+        ...     # only a single cell:
+        ...     answers.append(table.iat[coordinates[0]])
+        ...   else:
+        ...     # multiple cells
+        ...     cell_values = []
+        ...     for coordinate in coordinates:
+        ...        cell_values.append(table.iat[coordinate])
+        ...     answers.append(", ".join(cell_values))
+
+        >>> display(table)
+        >>> print("")
+        >>> for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+        ...   print(query)
+        ...   if predicted_agg == "NONE":
+        ...     print("Predicted answer: " + answer)
+        ...   else:
+        ...     print("Predicted answer: " + predicted_agg + " > " + answer)    
+        What is the name of the first actor?
+        Predicted answer: Brad Pitt
+        How many movies has George Clooney played in?
+        Predicted answer: COUNT > 69
+        What is the total number of movies?
+        Predicted answer: SUM > 87, 53, 69
+
+
 In case of a conversational set-up, then each table-question pair must be provided **sequentially** to the model, such
 that the ``prev_labels`` token types can be overwritten by the predicted ``labels`` of the previous table-question
 pair. Again, more info can be found in `this notebook
-<https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb>`__.
+<https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb>`__
+(for PyTorch) and `this notebook
+<https://github.com/kamalkraj/Tapas-Tutorial/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb>`__
+(for TensorFlow).


 Tapas specific outputs
@@ -433,3 +622,31 @@ TapasForQuestionAnswering

 .. autoclass:: transformers.TapasForQuestionAnswering
    :members: forward
+
+
+TFTapasModel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.TFTapasModel
+    :members: call
+
+
+TFTapasForMaskedLM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.TFTapasForMaskedLM
+    :members: call
+
+
+TFTapasForSequenceClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.TFTapasForSequenceClassification
+    :members: call
+
+
+TFTapasForQuestionAnswering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.TFTapasForQuestionAnswering
+    :members: call
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -1446,6 +1446,7 @@ if is_tf_available():
            "TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING",
            "TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
            "TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
+            "TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
            "TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
            "TF_MODEL_MAPPING",
            "TF_MODEL_WITH_LM_HEAD_MAPPING",
@@ -1458,6 +1459,7 @@ if is_tf_available():
            "TFAutoModelForQuestionAnswering",
            "TFAutoModelForSeq2SeqLM",
            "TFAutoModelForSequenceClassification",
+            "TFAutoModelForTableQuestionAnswering",
            "TFAutoModelForTokenClassification",
            "TFAutoModelWithLMHead",
        ]
@@ -1767,6 +1769,16 @@ if is_tf_available():
            "TFT5PreTrainedModel",
        ]
    )
+    _import_structure["models.tapas"].extend(
+        [
+            "TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST",
+            "TFTapasForMaskedLM",
+            "TFTapasForQuestionAnswering",
+            "TFTapasForSequenceClassification",
+            "TFTapasModel",
+            "TFTapasPreTrainedModel",
+        ]
+    )
    _import_structure["models.transfo_xl"].extend(
        [
            "TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -3225,6 +3237,7 @@ if TYPE_CHECKING:
            TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
            TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+            TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
            TF_MODEL_MAPPING,
            TF_MODEL_WITH_LM_HEAD_MAPPING,
@@ -3237,6 +3250,7 @@ if TYPE_CHECKING:
            TFAutoModelForQuestionAnswering,
            TFAutoModelForSeq2SeqLM,
            TFAutoModelForSequenceClassification,
+            TFAutoModelForTableQuestionAnswering,
            TFAutoModelForTokenClassification,
            TFAutoModelWithLMHead,
        )
@@ -3483,6 +3497,14 @@ if TYPE_CHECKING:
            TFT5Model,
            TFT5PreTrainedModel,
        )
+        from .models.tapas import (
+            TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST,
+            TFTapasForMaskedLM,
+            TFTapasForQuestionAnswering,
+            TFTapasForSequenceClassification,
+            TFTapasModel,
+            TFTapasPreTrainedModel,
+        )
        from .models.transfo_xl import (
            TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
            TFAdaptiveEmbedding,
--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -213,6 +213,14 @@ except importlib_metadata.PackageNotFoundError:
    _soundfile_available = False


+_tensorflow_probability_available = importlib.util.find_spec("tensorflow_probability") is not None
+try:
+    _tensorflow_probability_version = importlib_metadata.version("tensorflow_probability")
+    logger.debug(f"Successfully imported tensorflow-probability version {_tensorflow_probability_version}")
+except importlib_metadata.PackageNotFoundError:
+    _tensorflow_probability_available = False
+
+
 _timm_available = importlib.util.find_spec("timm") is not None
 try:
    _timm_version = importlib_metadata.version("timm")
@@ -444,6 +452,10 @@ def is_pytorch_quantization_available():
    return _pytorch_quantization_available


+def is_tensorflow_probability_available():
+    return _tensorflow_probability_available
+
+
 def is_pandas_available():
    return importlib.util.find_spec("pandas") is not None

@@ -629,6 +641,12 @@ PYTORCH_QUANTIZATION_IMPORT_ERROR = """
 `pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com`
 """

+# docstyle-ignore
+TENSORFLOW_PROBABILITY_IMPORT_ERROR = """
+{0} requires the tensorflow_probability library but it was not found in your environment. You can install it with pip as
+explained here: https://github.com/tensorflow/probability.
+"""
+

 # docstyle-ignore
 PANDAS_IMPORT_ERROR = """
@@ -684,6 +702,7 @@ BACKENDS_MAPPING = OrderedDict(
        ("sentencepiece", (is_sentencepiece_available, SENTENCEPIECE_IMPORT_ERROR)),
        ("sklearn", (is_sklearn_available, SKLEARN_IMPORT_ERROR)),
        ("speech", (is_speech_available, SPEECH_IMPORT_ERROR)),
+        ("tensorflow_probability", (is_tensorflow_probability_available, TENSORFLOW_PROBABILITY_IMPORT_ERROR)),
        ("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
        ("timm", (is_timm_available, TIMM_IMPORT_ERROR)),
        ("tokenizers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
--- a/src/transformers/modeling_tf_pytorch_utils.py
+++ b/src/transformers/modeling_tf_pytorch_utils.py
@@ -399,7 +399,9 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
            raise e

        # logger.warning(f"Initialize PyTorch weight {pt_weight_name}")
-
+        # Make sure we have a proper numpy array
+        if numpy.isscalar(array):
+            array = numpy.array(array)
        new_pt_params_dict[pt_weight_name] = torch.from_numpy(array)
        loaded_pt_weights_data_ptr[pt_weight.data_ptr()] = torch.from_numpy(array)
        all_tf_weights.discard(pt_weight_name)
--- a/src/transformers/models/auto/init.py
+++ b/src/transformers/models/auto/init.py
@@ -83,6 +83,7 @@ if is_tf_available():
        "TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING",
        "TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
        "TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
+        "TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
        "TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
        "TF_MODEL_MAPPING",
        "TF_MODEL_WITH_LM_HEAD_MAPPING",
@@ -95,6 +96,7 @@ if is_tf_available():
        "TFAutoModelForQuestionAnswering",
        "TFAutoModelForSeq2SeqLM",
        "TFAutoModelForSequenceClassification",
+        "TFAutoModelForTableQuestionAnswering",
        "TFAutoModelForTokenClassification",
        "TFAutoModelWithLMHead",
    ]
@@ -189,6 +191,7 @@ if TYPE_CHECKING:
            TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
            TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+            TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
            TF_MODEL_MAPPING,
            TF_MODEL_WITH_LM_HEAD_MAPPING,
@@ -201,6 +204,7 @@ if TYPE_CHECKING:
            TFAutoModelForQuestionAnswering,
            TFAutoModelForSeq2SeqLM,
            TFAutoModelForSequenceClassification,
+            TFAutoModelForTableQuestionAnswering,
            TFAutoModelForTokenClassification,
            TFAutoModelWithLMHead,
        )
--- a/src/transformers/models/auto/modeling_tf_auto.py
+++ b/src/transformers/models/auto/modeling_tf_auto.py
@@ -59,6 +59,7 @@ TF_MODEL_MAPPING_NAMES = OrderedDict(
        ("funnel", ("TFFunnelModel", "TFFunnelBaseModel")),
        ("dpr", "TFDPRQuestionEncoder"),
        ("mpnet", "TFMPNetModel"),
+        ("tapas", "TFTapasModel"),
        ("mbart", "TFMBartModel"),
        ("marian", "TFMarianModel"),
        ("pegasus", "TFPegasusModel"),
@@ -92,6 +93,7 @@ TF_MODEL_FOR_PRETRAINING_MAPPING_NAMES = OrderedDict(
        ("xlm", "TFXLMWithLMHeadModel"),
        ("ctrl", "TFCTRLLMHeadModel"),
        ("electra", "TFElectraForPreTraining"),
+        ("tapas", "TFTapasForMaskedLM"),
        ("funnel", "TFFunnelForPreTraining"),
        ("mpnet", "TFMPNetForMaskedLM"),
    ]
@@ -124,6 +126,7 @@ TF_MODEL_WITH_LM_HEAD_MAPPING_NAMES = OrderedDict(
        ("xlm", "TFXLMWithLMHeadModel"),
        ("ctrl", "TFCTRLLMHeadModel"),
        ("electra", "TFElectraForMaskedLM"),
+        ("tapas", "TFTapasForMaskedLM"),
        ("funnel", "TFFunnelForMaskedLM"),
        ("mpnet", "TFMPNetForMaskedLM"),
    ]
@@ -172,6 +175,7 @@ TF_MODEL_FOR_MASKED_LM_MAPPING_NAMES = OrderedDict(
        ("flaubert", "TFFlaubertWithLMHeadModel"),
        ("xlm", "TFXLMWithLMHeadModel"),
        ("electra", "TFElectraForMaskedLM"),
+        ("tapas", "TFTapasForMaskedLM"),
        ("funnel", "TFFunnelForMaskedLM"),
        ("mpnet", "TFMPNetForMaskedLM"),
    ]
@@ -215,6 +219,7 @@ TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
        ("flaubert", "TFFlaubertForSequenceClassification"),
        ("xlm", "TFXLMForSequenceClassification"),
        ("electra", "TFElectraForSequenceClassification"),
+        ("tapas", "TFTapasForSequenceClassification"),
        ("funnel", "TFFunnelForSequenceClassification"),
        ("gpt2", "TFGPT2ForSequenceClassification"),
        ("mpnet", "TFMPNetForSequenceClassification"),
@@ -249,6 +254,14 @@ TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
    ]
 )

+TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
+    [
+        # Model for Table Question Answering mapping
+        ("tapas", "TFTapasForQuestionAnswering"),
+    ]
+)
+
+
 TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
    [
        # Model for Token Classification mapping
@@ -323,6 +336,9 @@ TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = _LazyAutoMapping(
 TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
    CONFIG_MAPPING_NAMES, TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
 )
+TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
+    CONFIG_MAPPING_NAMES, TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES
+)
 TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
    CONFIG_MAPPING_NAMES, TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES
 )
@@ -402,6 +418,17 @@ class TFAutoModelForQuestionAnswering(_BaseAutoModelClass):
 TFAutoModelForQuestionAnswering = auto_class_update(TFAutoModelForQuestionAnswering, head_doc="question answering")


+class TFAutoModelForTableQuestionAnswering(_BaseAutoModelClass):
+    _model_mapping = TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
+
+
+TFAutoModelForTableQuestionAnswering = auto_class_update(
+    TFAutoModelForTableQuestionAnswering,
+    head_doc="table question answering",
+    checkpoint_for_example="google/tapas-base-finetuned-wtq",
+)
+
+
 class TFAutoModelForTokenClassification(_BaseAutoModelClass):
    _model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING

--- a/src/transformers/models/tapas/init.py
+++ b/src/transformers/models/tapas/init.py
@@ -18,7 +18,7 @@

 from typing import TYPE_CHECKING

-from ...file_utils import _LazyModule, is_torch_available
+from ...file_utils import _LazyModule, is_tf_available, is_torch_available


 _import_structure = {
@@ -36,6 +36,15 @@ if is_torch_available():
        "TapasPreTrainedModel",
        "load_tf_weights_in_tapas",
    ]
+if is_tf_available():
+    _import_structure["modeling_tf_tapas"] = [
+        "TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST",
+        "TFTapasForMaskedLM",
+        "TFTapasForQuestionAnswering",
+        "TFTapasForSequenceClassification",
+        "TFTapasModel",
+        "TFTapasPreTrainedModel",
+    ]


 if TYPE_CHECKING:
@@ -53,6 +62,17 @@ if TYPE_CHECKING:
            load_tf_weights_in_tapas,
        )

+    if is_tf_available():
+        from .modeling_tf_tapas import (
+            TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST,
+            TFTapasForMaskedLM,
+            TFTapasForQuestionAnswering,
+            TFTapasForSequenceClassification,
+            TFTapasModel,
+            TFTapasPreTrainedModel,
+        )
+
+
 else:
    import sys

--- a/src/transformers/models/tapas/modeling_tf_tapas.py
+++ b/src/transformers/models/tapas/modeling_tf_tapas.py
--- a/src/transformers/models/tapas/tokenization_tapas.py
+++ b/src/transformers/models/tapas/tokenization_tapas.py
@@ -1897,9 +1897,9 @@ class TapasTokenizer(PreTrainedTokenizer):
            data (:obj:`dict`):
                Dictionary mapping features to actual values. Should be created using
                :class:`~transformers.TapasTokenizer`.
-            logits (:obj:`np.ndarray` of shape ``(batch_size, sequence_length)``):
+            logits (:obj:`torch.Tensor` or :obj:`tf.Tensor` of shape ``(batch_size, sequence_length)``):
                Tensor containing the logits at the token level.
-            logits_agg (:obj:`np.ndarray` of shape ``(batch_size, num_aggregation_labels)``, `optional`):
+            logits_agg (:obj:`torch.Tensor` or :obj:`tf.Tensor` of shape ``(batch_size, num_aggregation_labels)``, `optional`):
                Tensor containing the aggregation logits.
            cell_classification_threshold (:obj:`float`, `optional`, defaults to 0.5):
                Threshold to be used for cell selection. All table cells for which their probability is larger than
@@ -1915,6 +1915,11 @@ class TapasTokenizer(PreTrainedTokenizer):
            - predicted_aggregation_indices (``List[int]``of length ``batch_size``, `optional`, returned when
              ``logits_aggregation`` is provided): Predicted aggregation operator indices of the aggregation head.
        """
+        # converting to numpy arrays to work with PT/TF
+        logits = logits.numpy()
+        if logits_agg is not None:
+            logits_agg = logits_agg.numpy()
+        data = {key: value.numpy() for key, value in data.items() if key != "training"}
        # input data is of type float32
        # np.log(np.finfo(np.float32).max) = 88.72284
        # Any value over 88.72284 will overflow when passed through the exponential, sending a warning
@@ -1975,7 +1980,7 @@ class TapasTokenizer(PreTrainedTokenizer):
        output = (predicted_answer_coordinates,)

        if logits_agg is not None:
-            predicted_aggregation_indices = logits_agg.argmax(dim=-1)
+            predicted_aggregation_indices = logits_agg.argmax(axis=-1)
            output = (predicted_answer_coordinates, predicted_aggregation_indices.tolist())

        return output
--- a/src/transformers/pipelines/init.py
+++ b/src/transformers/pipelines/init.py
@@ -78,6 +78,7 @@ if is_tf_available():
        TFAutoModelForQuestionAnswering,
        TFAutoModelForSeq2SeqLM,
        TFAutoModelForSequenceClassification,
+        TFAutoModelForTableQuestionAnswering,
        TFAutoModelForTokenClassification,
    )

@@ -170,7 +171,7 @@ SUPPORTED_TASKS = {
    "table-question-answering": {
        "impl": TableQuestionAnsweringPipeline,
        "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
-        "tf": (),
+        "tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (),
        "default": {
            "model": {
                "pt": "google/tapas-base-finetuned-wtq",
--- a/src/transformers/pipelines/table_question_answering.py
+++ b/src/transformers/pipelines/table_question_answering.py
@@ -2,7 +2,13 @@ import collections

 import numpy as np

-from ..file_utils import add_end_docstrings, is_torch_available, requires_backends
+from ..file_utils import (
+    add_end_docstrings,
+    is_tensorflow_probability_available,
+    is_tf_available,
+    is_torch_available,
+    requires_backends,
+)
 from .base import PIPELINE_INIT_ARGS, ArgumentHandler, Pipeline, PipelineException


@@ -11,6 +17,13 @@ if is_torch_available():

    from ..models.auto.modeling_auto import MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING

+if is_tf_available() and is_tensorflow_probability_available():
+    import tensorflow as tf
+
+    import tensorflow_probability as tfp
+
+    from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
+

 class TableQuestionAnsweringArgumentHandler(ArgumentHandler):
    """
@@ -83,10 +96,11 @@ class TableQuestionAnsweringPipeline(Pipeline):
        super().__init__(*args, **kwargs)
        self._args_parser = args_parser

-        if self.framework == "tf":
-            raise ValueError("The TableQuestionAnsweringPipeline is only available in PyTorch.")
-
-        self.check_model_type(MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING)
+        self.check_model_type(
+            TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
+            if self.framework == "tf"
+            else MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
+        )

        self.aggregate = bool(getattr(self.model.config, "aggregation_labels")) and bool(
            getattr(self.model.config, "num_aggregation_labels")
@@ -100,6 +114,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        """
+        if self.framework == "pt":
            all_logits = []
            all_aggregations = []
            prev_answers = None
@@ -161,6 +176,67 @@ class TableQuestionAnsweringPipeline(Pipeline):
            logits_batch = torch.cat(tuple(all_logits), 0)

            return (logits_batch,) if not self.aggregate else (logits_batch, torch.cat(tuple(all_aggregations), 0))
+        else:
+            all_logits = []
+            all_aggregations = []
+            prev_answers = None
+            batch_size = inputs["input_ids"].shape[0]
+
+            input_ids = inputs["input_ids"]
+            attention_mask = inputs["attention_mask"]
+            token_type_ids = inputs["token_type_ids"].numpy()
+            token_type_ids_example = None
+
+            for index in range(batch_size):
+                # If sequences have already been processed, the token type IDs will be created according to the previous
+                # answer.
+                if prev_answers is not None:
+                    prev_labels_example = token_type_ids_example[:, 3]  # shape (seq_len,)
+                    model_labels = np.zeros_like(prev_labels_example, dtype=np.int32)  # shape (seq_len,)
+
+                    token_type_ids_example = token_type_ids[index]  # shape (seq_len, 7)
+                    for i in range(model_labels.shape[0]):
+                        segment_id = token_type_ids_example[:, 0].tolist()[i]
+                        col_id = token_type_ids_example[:, 1].tolist()[i] - 1
+                        row_id = token_type_ids_example[:, 2].tolist()[i] - 1
+
+                        if row_id >= 0 and col_id >= 0 and segment_id == 1:
+                            model_labels[i] = int(prev_answers[(col_id, row_id)])
+
+                    token_type_ids_example[:, 3] = model_labels
+
+                input_ids_example = input_ids[index]
+                attention_mask_example = attention_mask[index]  # shape (seq_len,)
+                token_type_ids_example = token_type_ids[index]  # shape (seq_len, 7)
+                outputs = self.model(
+                    input_ids=np.expand_dims(input_ids_example, axis=0),
+                    attention_mask=np.expand_dims(attention_mask_example, axis=0),
+                    token_type_ids=np.expand_dims(token_type_ids_example, axis=0),
+                )
+                logits = outputs.logits
+
+                if self.aggregate:
+                    all_aggregations.append(outputs.logits_aggregation)
+
+                all_logits.append(logits)
+
+                dist_per_token = tfp.distributions.Bernoulli(logits=logits)
+                probabilities = dist_per_token.probs_parameter() * tf.cast(attention_mask_example, tf.float32)
+
+                coords_to_probs = collections.defaultdict(list)
+                token_type_ids_example = token_type_ids_example
+                for i, p in enumerate(tf.squeeze(probabilities).numpy().tolist()):
+                    segment_id = token_type_ids_example[:, 0].tolist()[i]
+                    col = token_type_ids_example[:, 1].tolist()[i] - 1
+                    row = token_type_ids_example[:, 2].tolist()[i] - 1
+                    if col >= 0 and row >= 0 and segment_id == 1:
+                        coords_to_probs[(col, row)].append(p)
+
+                prev_answers = {key: np.array(coords_to_probs[key]).mean() > 0.5 for key in coords_to_probs}
+
+            logits_batch = tf.concat(tuple(all_logits), 0)
+
+            return (logits_batch,) if not self.aggregate else (logits_batch, tf.concat(tuple(all_aggregations), 0))

    def __call__(self, *args, **kwargs):
        r"""
@@ -274,7 +350,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
        outputs = model_outputs["outputs"]
        if self.aggregate:
            logits, logits_agg = outputs[:2]
-            predictions = self.tokenizer.convert_logits_to_predictions(inputs, logits.detach(), logits_agg)
+            predictions = self.tokenizer.convert_logits_to_predictions(inputs, logits, logits_agg)
            answer_coordinates_batch, agg_predictions = predictions
            aggregators = {i: self.model.config.aggregation_labels[pred] for i, pred in enumerate(agg_predictions)}

@@ -284,7 +360,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
            }
        else:
            logits = outputs[0]
-            predictions = self.tokenizer.convert_logits_to_predictions(inputs, logits.detach())
+            predictions = self.tokenizer.convert_logits_to_predictions(inputs, logits)
            answer_coordinates_batch = predictions[0]
            aggregators = {}
            aggregators_prefix = {}
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -44,6 +44,7 @@ from .file_utils import (
    is_scatter_available,
    is_sentencepiece_available,
    is_soundfile_availble,
+    is_tensorflow_probability_available,
    is_tf_available,
    is_timm_available,
    is_tokenizers_available,
@@ -292,6 +293,19 @@ def require_torch_scatter(test_case):
        return test_case


+def require_tensorflow_probability(test_case):
+    """
+    Decorator marking a test that requires TensorFlow probability.
+
+    These tests are skipped when TensorFlow probability isn't installed.
+
+    """
+    if not is_tensorflow_probability_available():
+        return unittest.skip("test requires TensorFlow probability")(test_case)
+    else:
+        return test_case
+
+
 def require_torchaudio(test_case):
    """
    Decorator marking a test that requires torchaudio. These tests are skipped when torchaudio isn't installed.
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@@ -239,6 +239,9 @@ TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = None
 TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = None


+TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = None
+
+
 TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = None


@@ -356,6 +359,18 @@ class TFAutoModelForSequenceClassification:
        requires_backends(self, ["tf"])


+class TFAutoModelForTableQuestionAnswering:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tf"])
+
+    def call(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
 class TFAutoModelForTokenClassification:
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tf"])
@@ -2488,6 +2503,69 @@ class TFT5PreTrainedModel:
        requires_backends(self, ["tf"])


+TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST = None
+
+
+class TFTapasForMaskedLM:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tf"])
+
+    def call(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
+class TFTapasForQuestionAnswering:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tf"])
+
+    def call(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
+class TFTapasForSequenceClassification:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tf"])
+
+    def call(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
+class TFTapasModel:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tf"])
+
+    def call(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
+class TFTapasPreTrainedModel:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tf"])
+
+    def call(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
 TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = None


--- a/tests/test_modeling_tf_auto.py
+++ b/tests/test_modeling_tf_auto.py
@@ -17,8 +17,14 @@ import copy
 import tempfile
 import unittest

-from transformers import CONFIG_MAPPING, AutoConfig, BertConfig, GPT2Config, T5Config, is_tf_available
-from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER, SMALL_MODEL_IDENTIFIER, require_tf, slow
+from transformers import CONFIG_MAPPING, AutoConfig, BertConfig, GPT2Config, T5Config, TapasConfig, is_tf_available
+from transformers.testing_utils import (
+    DUMMY_UNKNOWN_IDENTIFIER,
+    SMALL_MODEL_IDENTIFIER,
+    require_tensorflow_probability,
+    require_tf,
+    slow,
+)

 from .test_modeling_bert import BertModelTester

@@ -32,6 +38,7 @@ if is_tf_available():
        TFAutoModelForQuestionAnswering,
        TFAutoModelForSeq2SeqLM,
        TFAutoModelForSequenceClassification,
+        TFAutoModelForTableQuestionAnswering,
        TFAutoModelForTokenClassification,
        TFAutoModelWithLMHead,
        TFBertForMaskedLM,
@@ -44,6 +51,7 @@ if is_tf_available():
        TFGPT2LMHeadModel,
        TFRobertaForMaskedLM,
        TFT5ForConditionalGeneration,
+        TFTapasForQuestionAnswering,
    )
    from transformers.models.auto.modeling_tf_auto import (
        TF_MODEL_FOR_CAUSAL_LM_MAPPING,
@@ -52,6 +60,7 @@ if is_tf_available():
        TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+        TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
        TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
        TF_MODEL_MAPPING,
        TF_MODEL_WITH_LM_HEAD_MAPPING,
@@ -59,6 +68,7 @@ if is_tf_available():
    from transformers.models.bert.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
    from transformers.models.gpt2.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
    from transformers.models.t5.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
+    from transformers.models.tapas.modeling_tf_tapas import TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST


 class NewModelConfig(BertConfig):
@@ -176,6 +186,21 @@ class TFAutoModelTest(unittest.TestCase):
            self.assertIsNotNone(model)
            self.assertIsInstance(model, TFBertForQuestionAnswering)

+    @slow
+    @require_tensorflow_probability
+    def test_table_question_answering_model_from_pretrained(self):
+        for model_name in TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST[5:6]:
+            config = AutoConfig.from_pretrained(model_name)
+            self.assertIsNotNone(config)
+            self.assertIsInstance(config, TapasConfig)
+
+            model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_name)
+            model, loading_info = TFAutoModelForTableQuestionAnswering.from_pretrained(
+                model_name, output_loading_info=True
+            )
+            self.assertIsNotNone(model)
+            self.assertIsInstance(model, TFTapasForQuestionAnswering)
+
    def test_from_pretrained_identifier(self):
        model = TFAutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER)
        self.assertIsInstance(model, TFBertForMaskedLM)
@@ -210,6 +235,7 @@ class TFAutoModelTest(unittest.TestCase):
            TF_MODEL_MAPPING,
            TF_MODEL_FOR_PRETRAINING_MAPPING,
            TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
+            TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
            TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
            TF_MODEL_WITH_LM_HEAD_MAPPING,
--- a/tests/test_modeling_tf_tapas.py
+++ b/tests/test_modeling_tf_tapas.py
--- a/tests/test_pipelines_table_question_answering.py
+++ b/tests/test_pipelines_table_question_answering.py
@@ -19,11 +19,13 @@ from transformers import (
    AutoModelForTableQuestionAnswering,
    AutoTokenizer,
    TableQuestionAnsweringPipeline,
+    TFAutoModelForTableQuestionAnswering,
    pipeline,
 )
 from transformers.testing_utils import (
    is_pipeline_test,
    require_pandas,
+    require_tensorflow_probability,
    require_tf,
    require_torch,
    require_torch_scatter,
@@ -33,6 +35,7 @@ from transformers.testing_utils import (
 from .test_pipelines_common import PipelineTestCaseMeta


+@require_tensorflow_probability
@require_torch_scatter
@require_torch
@require_pandas
@@ -43,9 +46,105 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING

    @require_tf
-    @unittest.skip("Table question answering not implemented in TF")
    def test_small_model_tf(self):
-        pass
+        model_id = "lysandre/tiny-tapas-random-wtq"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.assertIsInstance(model.config.aggregation_labels, dict)
+        self.assertIsInstance(model.config.no_aggregation_label_index, int)
+
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query="how many movies has george clooney played in?",
+        )
+        self.assertEqual(
+            outputs,
+            {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+        )
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+            ],
+        )
+        outputs = table_querier(
+            table={
+                "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                "Stars": ["36542", "4512", "3934"],
+                "Contributors": ["651", "77", "34"],
+                "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+            },
+            query=[
+                "What repository has the largest number of stars?",
+                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+                "What is the number of repositories?",
+                "What is the average number of stars?",
+                "What is the total amount of stars?",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table=None)
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table="")
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table={})
+        with self.assertRaises(ValueError):
+            table_querier(
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                }
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query="",
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query=None,
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )

    @require_torch
    def test_small_model_pt(self):
@@ -148,7 +247,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
                },
            )

-    def test_slow_tokenizer_sqa(self):
+    @require_torch
+    def test_slow_tokenizer_sqa_pt(self):
        model_id = "lysandre/tiny-tapas-random-sqa"
        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -265,8 +365,126 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
                },
            )

+    @require_tf
+    def test_slow_tokenizer_sqa_tf(self):
+        model_id = "lysandre/tiny-tapas-random-sqa"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+
+        inputs = {
+            "table": {
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            "query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        }
+        sequential_outputs = table_querier(**inputs, sequential=True)
+        batch_outputs = table_querier(**inputs, sequential=False)
+
+        self.assertEqual(len(sequential_outputs), 3)
+        self.assertEqual(len(batch_outputs), 3)
+        self.assertEqual(sequential_outputs[0], batch_outputs[0])
+        self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
+        # self.assertNotEqual(sequential_outputs[2], batch_outputs[2])
+
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query="how many movies has george clooney played in?",
+        )
+        self.assertEqual(
+            outputs,
+            {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+        )
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+            ],
+        )
+        outputs = table_querier(
+            table={
+                "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                "Stars": ["36542", "4512", "3934"],
+                "Contributors": ["651", "77", "34"],
+                "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+            },
+            query=[
+                "What repository has the largest number of stars?",
+                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+                "What is the number of repositories?",
+                "What is the average number of stars?",
+                "What is the total amount of stars?",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table=None)
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table="")
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table={})
+        with self.assertRaises(ValueError):
+            table_querier(
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                }
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query="",
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query=None,
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+
    @slow
-    def test_integration_wtq(self):
+    def test_integration_wtq_pt(self):
        table_querier = pipeline("table-question-answering")

        data = {
@@ -310,7 +528,54 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
        self.assertListEqual(results, expected_results)

    @slow
-    def test_integration_sqa(self):
+    def test_integration_wtq_tf(self):
+        model_id = "google/tapas-base-finetuned-wtq"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
+
+        data = {
+            "Repository": ["Transformers", "Datasets", "Tokenizers"],
+            "Stars": ["36542", "4512", "3934"],
+            "Contributors": ["651", "77", "34"],
+            "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+        }
+        queries = [
+            "What repository has the largest number of stars?",
+            "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+            "What is the number of repositories?",
+            "What is the average number of stars?",
+            "What is the total amount of stars?",
+        ]
+
+        results = table_querier(data, queries)
+
+        expected_results = [
+            {"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
+            {"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
+            {
+                "answer": "COUNT > Transformers, Datasets, Tokenizers",
+                "coordinates": [(0, 0), (1, 0), (2, 0)],
+                "cells": ["Transformers", "Datasets", "Tokenizers"],
+                "aggregator": "COUNT",
+            },
+            {
+                "answer": "AVERAGE > 36542, 4512, 3934",
+                "coordinates": [(0, 1), (1, 1), (2, 1)],
+                "cells": ["36542", "4512", "3934"],
+                "aggregator": "AVERAGE",
+            },
+            {
+                "answer": "SUM > 36542, 4512, 3934",
+                "coordinates": [(0, 1), (1, 1), (2, 1)],
+                "cells": ["36542", "4512", "3934"],
+                "aggregator": "SUM",
+            },
+        ]
+        self.assertListEqual(results, expected_results)
+
+    @slow
+    def test_integration_sqa_pt(self):
        table_querier = pipeline(
            "table-question-answering",
            model="google/tapas-base-finetuned-sqa",
@@ -331,3 +596,29 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
            {"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
        ]
        self.assertListEqual(results, expected_results)
+
+    @slow
+    def test_integration_sqa_tf(self):
+        model_id = "google/tapas-base-finetuned-sqa"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = pipeline(
+            "table-question-answering",
+            model=model,
+            tokenizer=tokenizer,
+        )
+        data = {
+            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
+            "Age": ["56", "45", "59"],
+            "Number of movies": ["87", "53", "69"],
+            "Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+        }
+        queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
+        results = table_querier(data, queries, sequential=True)
+
+        expected_results = [
+            {"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},
+            {"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]},
+            {"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
+        ]
+        self.assertListEqual(results, expected_results)