[Docs] Fix spelling and grammar mistakes (#28825)
* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts
This commit is contained in:
@@ -166,7 +166,7 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(expected_src_tokens, batch["input_ids"][0])
|
||||
self.assertEqual(expected_tgt_tokens, batch["labels"][0])
|
||||
|
||||
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
|
||||
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
|
||||
def test_save_and_load_tokenizer(self):
|
||||
# safety check on max_len default value so we are sure the test works
|
||||
tokenizers = self.get_tokenizers()
|
||||
|
||||
@@ -82,7 +82,7 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(32, targets["input_ids"].shape[1])
|
||||
|
||||
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
|
||||
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
|
||||
def test_save_and_load_tokenizer(self):
|
||||
# safety check on max_len default value so we are sure the test works
|
||||
tokenizers = self.get_tokenizers()
|
||||
|
||||
@@ -367,10 +367,10 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert fast == [319, 4559, 1243, 2]
|
||||
|
||||
slow_tokenzier = CodeLlamaTokenizer.from_pretrained(
|
||||
slow_tokenizer = CodeLlamaTokenizer.from_pretrained(
|
||||
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
|
||||
)
|
||||
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
|
||||
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert slow == [319, 4559, 1243, 2]
|
||||
|
||||
self.tokenizer.add_eos_token = False
|
||||
|
||||
@@ -360,10 +360,10 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert fast == [319, 4559, 1243, 2]
|
||||
|
||||
slow_tokenzier = LlamaTokenizer.from_pretrained(
|
||||
slow_tokenizer = LlamaTokenizer.from_pretrained(
|
||||
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
|
||||
)
|
||||
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
|
||||
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert slow == [319, 4559, 1243, 2]
|
||||
|
||||
self.tokenizer.add_eos_token = False
|
||||
|
||||
@@ -148,7 +148,7 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(32, targets["input_ids"].shape[1])
|
||||
|
||||
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
|
||||
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
|
||||
def test_save_and_load_tokenizer(self):
|
||||
# safety check on max_len default value so we are sure the test works
|
||||
tokenizers = self.get_tokenizers()
|
||||
|
||||
@@ -158,7 +158,7 @@ class Qwen2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(tokenizer_output_string, output_string)
|
||||
|
||||
def test_slow_tokenizer_decode_spaces_between_special_tokens_default(self):
|
||||
# Qwen2Tokenzier changes the default `spaces_between_special_tokens` in `decode` to False
|
||||
# Qwen2Tokenizer changes the default `spaces_between_special_tokens` in `decode` to False
|
||||
if not self.test_slow_tokenizer:
|
||||
return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user