Enforce string-formatting with f-strings (#10980)
* First third * Styling and fix mistake * Quality * All the rest * Treat %s and %d * typo * Missing ) * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
@@ -86,6 +86,6 @@ if __name__ == "__main__":
|
||||
end_train_time = time.time() - start_train_time
|
||||
|
||||
logger.info("*** Train ***")
|
||||
logger.info("train_runtime = %s", end_train_time)
|
||||
logger.info(f"train_runtime = {end_train_time}")
|
||||
for key, value in train_results.history.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
logger.info(f" {key} = {value}")
|
||||
|
||||
@@ -157,7 +157,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
end_train_time = time.time() - start_train_time
|
||||
logger.info("*** Train ***")
|
||||
logger.info("train_runtime = %s", end_train_time)
|
||||
logger.info(f"train_runtime = {end_train_time}")
|
||||
|
||||
output_eval_file = os.path.join(args.output_dir, "train_results.txt")
|
||||
|
||||
@@ -166,8 +166,8 @@ if __name__ == "__main__":
|
||||
logger.info("***** Train results *****")
|
||||
logger.info(train_results)
|
||||
for key, value in train_results.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Evaluation
|
||||
if args.do_eval and (not SDP_ENABLED or sdp.rank() == 0):
|
||||
@@ -181,8 +181,8 @@ if __name__ == "__main__":
|
||||
logger.info("***** Eval results *****")
|
||||
logger.info(result)
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Save result
|
||||
if SDP_ENABLED:
|
||||
|
||||
@@ -31,8 +31,8 @@ PASS = "__DUMMY_TRANSFORMERS_PASS__"
|
||||
ENDPOINT_STAGING = "https://moon-staging.huggingface.co"
|
||||
ENDPOINT_STAGING_BASIC_AUTH = f"https://{USER}:{PASS}@moon-staging.huggingface.co"
|
||||
|
||||
REPO_NAME = "my-model-{}".format(int(time.time()))
|
||||
REPO_NAME_LARGE_FILE = "my-model-largefiles-{}".format(int(time.time()))
|
||||
REPO_NAME = f"my-model-{int(time.time())}"
|
||||
REPO_NAME_LARGE_FILE = f"my-model-largefiles-{int(time.time())}"
|
||||
WORKING_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/working_repo")
|
||||
LARGE_FILE_14MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.epub"
|
||||
LARGE_FILE_18MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.pdf"
|
||||
@@ -95,7 +95,7 @@ class HfFolderTest(unittest.TestCase):
|
||||
Test the whole token save/get/delete workflow,
|
||||
with the desired behavior with respect to non-existent tokens.
|
||||
"""
|
||||
token = "token-{}".format(int(time.time()))
|
||||
token = f"token-{int(time.time())}"
|
||||
HfFolder.save_token(token)
|
||||
self.assertEqual(HfFolder.get_token(), token)
|
||||
HfFolder.delete_token()
|
||||
|
||||
@@ -172,7 +172,7 @@ class ModelTesterMixin:
|
||||
self.assertIn(
|
||||
((param.data.mean() * 1e9).round() / 1e9).item(),
|
||||
[0.0, 1.0],
|
||||
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
|
||||
def test_determinism(self):
|
||||
@@ -928,7 +928,7 @@ class ModelTesterMixin:
|
||||
model.base_model.save_pretrained(temp_dir_name)
|
||||
model, loading_info = model_class.from_pretrained(temp_dir_name, output_loading_info=True)
|
||||
|
||||
with self.subTest(msg="Missing keys for {}".format(model.__class__.__name__)):
|
||||
with self.subTest(msg=f"Missing keys for {model.__class__.__name__}"):
|
||||
self.assertGreater(len(loading_info["missing_keys"]), 0)
|
||||
|
||||
def test_tie_model_weights(self):
|
||||
|
||||
@@ -365,10 +365,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -74,7 +74,7 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
msg = f"{a} != {b}"
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
|
||||
@@ -195,8 +195,6 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
mapping = tuple(mapping.items())
|
||||
for index, (child_config, child_model) in enumerate(mapping[1:]):
|
||||
for parent_config, parent_model in mapping[: index + 1]:
|
||||
with self.subTest(
|
||||
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
|
||||
):
|
||||
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
|
||||
self.assertFalse(issubclass(child_config, parent_config))
|
||||
self.assertFalse(issubclass(child_model, parent_model))
|
||||
|
||||
@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -287,10 +287,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -380,10 +380,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -320,10 +320,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -291,10 +291,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -318,10 +318,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
if len(prefix) > 0:
|
||||
prefix = f"{prefix}: "
|
||||
raise AssertionError(f"{prefix}{a} != {b}")
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
|
||||
@@ -320,13 +320,13 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
if "conv.weight" in name or "masked_spec_embed" in name:
|
||||
self.assertTrue(
|
||||
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
|
||||
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
else:
|
||||
self.assertIn(
|
||||
((param.data.mean() * 1e9).round() / 1e9).item(),
|
||||
[0.0, 1.0],
|
||||
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
|
||||
@slow
|
||||
@@ -437,13 +437,13 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
if "conv.weight" in name or "masked_spec_embed" in name:
|
||||
self.assertTrue(
|
||||
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
|
||||
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
else:
|
||||
self.assertIn(
|
||||
((param.data.mean() * 1e9).round() / 1e9).item(),
|
||||
[0.0, 1.0],
|
||||
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
|
||||
@slow
|
||||
|
||||
@@ -101,9 +101,7 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
mapping = tuple(mapping.items())
|
||||
for index, (child_config, _) in enumerate(mapping[1:]):
|
||||
for parent_config, _ in mapping[: index + 1]:
|
||||
with self.subTest(
|
||||
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
|
||||
):
|
||||
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
|
||||
self.assertFalse(issubclass(child_config, parent_config))
|
||||
|
||||
@require_tokenizers
|
||||
|
||||
@@ -154,7 +154,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_embeded_special_tokens(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
sentence = "A, <mask> AllenNLP sentence."
|
||||
|
||||
@@ -250,7 +250,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_offsets_with_special_characters(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
|
||||
|
||||
@@ -38,7 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
|
||||
with open(self.vocab_file, "w", encoding="utf-8") as fp:
|
||||
for token in vocab_tokens:
|
||||
fp.write("{} {}".format(token, vocab_tokens[token]) + "\n")
|
||||
fp.write(f"{token} {vocab_tokens[token]}\n")
|
||||
with open(self.merges_file, "w", encoding="utf-8") as fp:
|
||||
fp.write("\n".join(merges))
|
||||
|
||||
|
||||
@@ -1216,18 +1216,18 @@ class TokenizerTesterMixin:
|
||||
empty_tokens = tokenizer("", padding=True, pad_to_multiple_of=8)
|
||||
normal_tokens = tokenizer("This is a sample input", padding=True, pad_to_multiple_of=8)
|
||||
for key, value in empty_tokens.items():
|
||||
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
for key, value in normal_tokens.items():
|
||||
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
|
||||
normal_tokens = tokenizer("This", pad_to_multiple_of=8)
|
||||
for key, value in normal_tokens.items():
|
||||
self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
|
||||
# Should also work with truncation
|
||||
normal_tokens = tokenizer("This", padding=True, truncation=True, pad_to_multiple_of=8)
|
||||
for key, value in normal_tokens.items():
|
||||
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
|
||||
# truncation to something which is not a multiple of pad_to_multiple_of raises an error
|
||||
self.assertRaises(
|
||||
@@ -1897,7 +1897,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_is_fast(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -1907,7 +1907,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_fast_only_inputs(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
# Ensure None raise an error
|
||||
@@ -1918,7 +1918,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_alignement_methods(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
words = ["Wonderful", "no", "inspiration", "example", "with", "subtoken"]
|
||||
@@ -2144,7 +2144,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_tokenization_python_rust_equals(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2181,7 +2181,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_num_special_tokens_to_add_equal(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2195,7 +2195,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_max_length_equal(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2205,7 +2205,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_special_tokens_map_equal(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2217,7 +2217,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_add_tokens(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
vocab_size = len(tokenizer_r)
|
||||
@@ -2239,7 +2239,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_offsets_mapping(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
text = "Wonderful no inspiration example with subtoken"
|
||||
@@ -2285,9 +2285,7 @@ class TokenizerTesterMixin:
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
tokenizer = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
with self.subTest(
|
||||
"{} ({}, {})".format(tokenizer.__class__.__name__, pretrained_name, tokenizer.__class__.__name__)
|
||||
):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name}, {tokenizer.__class__.__name__})"):
|
||||
|
||||
if is_torch_available():
|
||||
returned_tensor = "pt"
|
||||
@@ -2341,7 +2339,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_compare_pretokenized_inputs(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2419,7 +2417,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_create_token_type_ids(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
input_simple = [1, 2, 3]
|
||||
@@ -2437,7 +2435,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_build_inputs_with_special_tokens(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
# # Input string
|
||||
@@ -2470,7 +2468,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_padding(self, max_length=50):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2688,7 +2686,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_padding_different_model_input_name(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
self.assertEqual(tokenizer_p.pad_token_id, tokenizer_r.pad_token_id)
|
||||
@@ -2722,7 +2720,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_save_pretrained(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
@@ -2747,7 +2745,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_embeded_special_tokens(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
sentence = "A, <mask> AllenNLP sentence."
|
||||
@@ -2772,7 +2770,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_compare_add_special_tokens(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
simple_num_special_tokens_to_add = tokenizer_r.num_special_tokens_to_add(pair=False)
|
||||
@@ -2811,7 +2809,7 @@ class TokenizerTesterMixin:
|
||||
|
||||
def test_compare_prepare_for_model(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
string_sequence = "Asserting that both tokenizers are equal"
|
||||
|
||||
@@ -133,7 +133,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_padding(self, max_length=15):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
# Simple input
|
||||
|
||||
@@ -87,7 +87,7 @@ class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_padding(self, max_length=15):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
# Simple input
|
||||
|
||||
@@ -39,7 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
with open(self.vocab_file, "w", encoding="utf-8") as fp:
|
||||
for token in vocab_tokens:
|
||||
fp.write("{} {}".format(token, vocab_tokens[token]) + "\n")
|
||||
fp.write(f"{token} {vocab_tokens[token]}\n")
|
||||
with open(self.merges_file, "w", encoding="utf-8") as fp:
|
||||
fp.write("\n".join(merges))
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ class ReformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_padding(self, max_length=15):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
# Simple input
|
||||
|
||||
@@ -167,7 +167,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_embeded_special_tokens(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
sentence = "A, <mask> AllenNLP sentence."
|
||||
|
||||
@@ -312,7 +312,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_offsets_with_special_characters(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
|
||||
@@ -807,18 +807,18 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
empty_tokens = tokenizer(table, padding=True, pad_to_multiple_of=8)
|
||||
normal_tokens = tokenizer(table, "This is a sample input", padding=True, pad_to_multiple_of=8)
|
||||
for key, value in empty_tokens.items():
|
||||
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
for key, value in normal_tokens.items():
|
||||
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
|
||||
normal_tokens = tokenizer(table, "This", pad_to_multiple_of=8)
|
||||
for key, value in normal_tokens.items():
|
||||
self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
|
||||
# Should also work with truncation
|
||||
normal_tokens = tokenizer(table, "This", padding=True, truncation=True, pad_to_multiple_of=8)
|
||||
for key, value in normal_tokens.items():
|
||||
self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key))
|
||||
self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8")
|
||||
|
||||
@unittest.skip("TAPAS cannot handle `prepare_for_model` without passing by `encode_plus` or `batch_encode_plus`")
|
||||
def test_prepare_for_model(self):
|
||||
|
||||
@@ -82,11 +82,8 @@ if __name__ == "__main__":
|
||||
training_args = parser.parse_args_into_dataclasses()[0]
|
||||
|
||||
logger.warning(
|
||||
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s",
|
||||
training_args.local_rank,
|
||||
training_args.device,
|
||||
training_args.n_gpu,
|
||||
training_args.local_rank != -1,
|
||||
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
|
||||
f"distributed training: {training_args.local_rank != -1}"
|
||||
)
|
||||
|
||||
# Essentially, what we want to verify in the distributed case is that we get all samples back,
|
||||
|
||||
@@ -69,10 +69,8 @@ def main():
|
||||
training_args = parser.parse_args_into_dataclasses()[0]
|
||||
|
||||
logger.warning(
|
||||
"Process rank: %s, device: %s, tpu_num_cores: %s",
|
||||
training_args.local_rank,
|
||||
training_args.device,
|
||||
training_args.tpu_num_cores,
|
||||
f"Process rank: {training_args.local_rank}, device: {training_args.device}, "
|
||||
f"tpu_num_cores: {training_args.tpu_num_cores}",
|
||||
)
|
||||
|
||||
# Essentially, what we want to verify in the distributed case is
|
||||
|
||||
Reference in New Issue
Block a user