From d415882b41e61c962df5c2c8086ec83fa4b71629 Mon Sep 17 00:00:00 2001
From: Lysandre Debut <lysandre@huggingface.co>
Date: Mon, 11 Jan 2021 08:02:41 -0500
Subject: [PATCH] Remove tolerance + drop_rows_to_fit by default (#9507)

* Remove tolerance + drop_rows_to_fit by default

* remove drop_rows_to_fit
---
 .../models/tapas/tokenization_tapas.py        |  9 ++-----
 tests/test_modeling_tapas.py                  | 27 +++++++++----------
 tests/test_tokenization_tapas.py              |  2 +-
 3 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py
index 7498b2a35e..4227ada6eb 100644
--- a/src/transformers/models/tapas/tokenization_tapas.py
+++ b/src/transformers/models/tapas/tokenization_tapas.py
@@ -255,7 +255,7 @@ class TapasTokenizer(PreTrainedTokenizer):
             value for :obj:`lowercase` (as in the original BERT).
         cell_trim_length (:obj:`int`, `optional`, defaults to -1):
             If > 0: Trim cells so that the length is <= this value. Also disables further cell trimming, should thus be
-            used with 'drop_rows_to_fit' below.
+            used with :obj:`truncation` set to :obj:`True`.
         max_column_id (:obj:`int`, `optional`):
             Max column id to extract.
         max_row_id (:obj:`int`, `optional`):
@@ -264,8 +264,6 @@ class TapasTokenizer(PreTrainedTokenizer):
             Whether to add empty strings instead of column names.
         update_answer_coordinates (:obj:`bool`, `optional`, defaults to :obj:`False`):
             Whether to recompute the answer coordinates from the answer text.
-        drop_rows_to_fit (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether to drop the last rows if a table doesn't fit within max sequence length.
 
     """
 
@@ -292,7 +290,6 @@ class TapasTokenizer(PreTrainedTokenizer):
         max_row_id: int = None,
         strip_column_names: bool = False,
         update_answer_coordinates: bool = False,
-        drop_rows_to_fit: bool = False,
         model_max_length: int = 512,
         additional_special_tokens: Optional[List[str]] = None,
         **kwargs
@@ -323,7 +320,6 @@ class TapasTokenizer(PreTrainedTokenizer):
             max_row_id=max_row_id,
             strip_column_names=strip_column_names,
             update_answer_coordinates=update_answer_coordinates,
-            drop_rows_to_fit=drop_rows_to_fit,
             model_max_length=model_max_length,
             additional_special_tokens=additional_special_tokens,
             **kwargs,
@@ -352,7 +348,6 @@ class TapasTokenizer(PreTrainedTokenizer):
         self.max_row_id = max_row_id if max_row_id is not None else self.model_max_length
         self.strip_column_names = strip_column_names
         self.update_answer_coordinates = update_answer_coordinates
-        self.drop_rows_to_fit = drop_rows_to_fit
 
     @property
     def do_lower_case(self):
@@ -1122,7 +1117,7 @@ class TapasTokenizer(PreTrainedTokenizer):
             prev_answer_coordinates = kwargs["prev_answer_coordinates"]
             prev_answer_text = kwargs["prev_answer_text"]
 
-        num_rows = self._get_num_rows(raw_table, self.drop_rows_to_fit)
+        num_rows = self._get_num_rows(raw_table, truncation != TapasTruncationStrategy.DO_NOT_TRUNCATE)
         num_columns = self._get_num_columns(raw_table)
         _, _, num_tokens = self._get_table_boundaries(tokenized_table)
 
diff --git a/tests/test_modeling_tapas.py b/tests/test_modeling_tapas.py
index 6897d683c5..b4f8f13231 100644
--- a/tests/test_modeling_tapas.py
+++ b/tests/test_modeling_tapas.py
@@ -540,9 +540,6 @@ def prepare_tapas_batch_inputs_for_training():
     return table, queries, answer_coordinates, answer_text, float_answer
 
 
-TOLERANCE = 1
-
-
 @require_torch
 @require_scatter
 class TapasModelIntegrationTest(unittest.TestCase):
@@ -574,12 +571,12 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(outputs.last_hidden_state[:, :3, :3], expected_slice, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(outputs.last_hidden_state[:, :3, :3], expected_slice, atol=0.0005))
 
         # test the pooled output
         expected_slice = torch.tensor([[0.987518311, -0.970520139, -0.994303405]], device=torch_device)
 
-        self.assertTrue(torch.allclose(outputs.pooler_output[:, :3], expected_slice, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(outputs.pooler_output[:, :3], expected_slice, atol=0.0005))
 
     @unittest.skip(reason="Model not available yet")
     def test_inference_masked_lm(self):
@@ -634,7 +631,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(logits, expected_tensor, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits, expected_tensor, atol=0.015))
 
     @slow
     def test_inference_question_answering_head_conversational_absolute_embeddings(self):
@@ -683,7 +680,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(logits, expected_tensor, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits, expected_tensor, atol=0.01))
 
     @slow
     def test_inference_question_answering_head_weak_supervision(self):
@@ -710,7 +707,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(logits[:, -6:], expected_slice, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits[:, -6:], expected_slice, atol=0.4))
 
         # test the aggregation logits
         logits_aggregation = outputs.logits_aggregation
@@ -721,7 +718,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=0.001))
 
         # test the predicted answer coordinates and aggregation indices
         EXPECTED_PREDICTED_ANSWER_COORDINATES = [[(0, 0)], [(1, 2)]]
@@ -778,7 +775,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
         # test the loss
         loss = outputs.loss
         expected_loss = torch.tensor(3.3527612686157227e-08, device=torch_device)
-        self.assertTrue(torch.allclose(loss, expected_loss, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(loss, expected_loss, atol=1e-6))
 
         # test the logits on the first example
         logits = outputs.logits
@@ -799,7 +796,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(logits[0, -9:], expected_slice, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits[0, -9:], expected_slice, atol=1e-6))
 
         # test the aggregation logits on the second example
         logits_aggregation = outputs.logits_aggregation
@@ -807,7 +804,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
         self.assertEqual(logits_aggregation.shape, expected_shape)
         expected_slice = torch.tensor([-4.0538, 40.0304, -5.3554, 23.3965], device=torch_device)
 
-        self.assertTrue(torch.allclose(logits_aggregation[1, -4:], expected_slice, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits_aggregation[1, -4:], expected_slice, atol=1e-4))
 
     @slow
     def test_inference_question_answering_head_strong_supervision(self):
@@ -854,7 +851,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             device=torch_device,
         )
 
-        self.assertTrue(torch.allclose(logits, expected_tensor, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits, expected_tensor, atol=0.02))
 
         # test the aggregation logits
         logits_aggregation = outputs.logits_aggregation
@@ -864,7 +861,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             [[16.5659733, -3.06624889, -2.34152961, -0.970244825]], device=torch_device
         )  # PyTorch model outputs [[16.5679, -3.0668, -2.3442, -0.9674]]
 
-        self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=0.003))
 
     @slow
     def test_inference_classification_head(self):
@@ -885,7 +882,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
             [[0.795137286, 9.5572]], device=torch_device
         )  # Note that the PyTorch model outputs [[0.8057, 9.5281]]
 
-        self.assertTrue(torch.allclose(outputs.logits, expected_tensor, atol=TOLERANCE))
+        self.assertTrue(torch.allclose(outputs.logits, expected_tensor, atol=0.05))
 
 
 # Below: tests for Tapas utilities which are defined in modeling_tapas.py.
diff --git a/tests/test_tokenization_tapas.py b/tests/test_tokenization_tapas.py
index 711656fe90..064be0d4b9 100644
--- a/tests/test_tokenization_tapas.py
+++ b/tests/test_tokenization_tapas.py
@@ -290,7 +290,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
 
     @slow
     def test_sequence_builders(self):
-        tokenizer = self.tokenizer_class.from_pretrained("nielsr/tapas-base-finetuned-wtq")
+        tokenizer = self.tokenizer_class.from_pretrained("google/tapas-base-finetuned-wtq")
 
         empty_table = self.get_table(tokenizer, length=0)
         table = self.get_table(tokenizer, length=10)