[tests] remove pt_tf equivalence tests (#36253)

This commit is contained in:
Joao Gante
2025-02-19 11:55:11 +00:00
committed by GitHub
parent 1a81d774b1
commit 0863eef248
60 changed files with 56 additions and 2438 deletions

View File

@@ -41,7 +41,6 @@ from transformers.models.layoutlmv2.tokenization_layoutlmv2 import (
_is_whitespace,
)
from transformers.testing_utils import (
is_pt_tf_cross_test,
require_detectron2,
require_pandas,
require_tokenizers,
@@ -1497,48 +1496,6 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertListEqual(new_encoded_inputs, dropped_encoded_inputs)
self.assertLessEqual(len(new_encoded_inputs), 20)
@is_pt_tf_cross_test
def test_batch_encode_plus_tensors(self):
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
words, boxes = self.get_words_and_boxes_batch()
# A Tensor cannot be build by sequences which are not the same size
self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="pt")
self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="tf")
if tokenizer.pad_token_id is None:
self.assertRaises(
ValueError,
tokenizer.batch_encode_plus,
words,
boxes=boxes,
padding=True,
return_tensors="pt",
)
self.assertRaises(
ValueError,
tokenizer.batch_encode_plus,
words,
boxes=boxes,
padding="longest",
return_tensors="tf",
)
else:
pytorch_tensor = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True, return_tensors="pt")
tensorflow_tensor = tokenizer.batch_encode_plus(
words, boxes=boxes, padding="longest", return_tensors="tf"
)
encoded_sequences = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True)
for key in encoded_sequences.keys():
pytorch_value = pytorch_tensor[key].tolist()
tensorflow_value = tensorflow_tensor[key].numpy().tolist()
encoded_value = encoded_sequences[key]
self.assertEqual(pytorch_value, tensorflow_value, encoded_value)
def test_sequence_ids(self):
tokenizers = self.get_tokenizers()
for tokenizer in tokenizers: