[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples

* Fix typos in docstrings and comments

* Fix spelling of `tokenizer` in model tests

* Remove erroneous spaces in decorators

* Remove extra spaces in Markdown link texts
This commit is contained in:
Klaus Hipp
2024-02-02 08:45:00 +01:00
committed by GitHub
parent 2418c64a1c
commit 721ee783ca
134 changed files with 185 additions and 186 deletions

View File

@@ -166,7 +166,7 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(expected_src_tokens, batch["input_ids"][0])
self.assertEqual(expected_tgt_tokens, batch["labels"][0])
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()

View File

@@ -82,7 +82,7 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
self.assertEqual(32, targets["input_ids"].shape[1])
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()

View File

@@ -367,10 +367,10 @@ class LlamaIntegrationTest(unittest.TestCase):
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
assert fast == [319, 4559, 1243, 2]
slow_tokenzier = CodeLlamaTokenizer.from_pretrained(
slow_tokenizer = CodeLlamaTokenizer.from_pretrained(
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
)
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
assert slow == [319, 4559, 1243, 2]
self.tokenizer.add_eos_token = False

View File

@@ -360,10 +360,10 @@ class LlamaIntegrationTest(unittest.TestCase):
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
assert fast == [319, 4559, 1243, 2]
slow_tokenzier = LlamaTokenizer.from_pretrained(
slow_tokenizer = LlamaTokenizer.from_pretrained(
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
)
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
assert slow == [319, 4559, 1243, 2]
self.tokenizer.add_eos_token = False

View File

@@ -148,7 +148,7 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
self.assertEqual(32, targets["input_ids"].shape[1])
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()

View File

@@ -158,7 +158,7 @@ class Qwen2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(tokenizer_output_string, output_string)
def test_slow_tokenizer_decode_spaces_between_special_tokens_default(self):
# Qwen2Tokenzier changes the default `spaces_between_special_tokens` in `decode` to False
# Qwen2Tokenizer changes the default `spaces_between_special_tokens` in `decode` to False
if not self.test_slow_tokenizer:
return