Fix tokenizers training in notebook (#10110)
This commit is contained in:
@@ -229,7 +229,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# We initialize our trainer, giving him the details about the vocabulary we want to generate\n",
|
"# We initialize our trainer, giving him the details about the vocabulary we want to generate\n",
|
||||||
"trainer = BpeTrainer(vocab_size=25000, show_progress=True, initial_alphabet=ByteLevel.alphabet())\n",
|
"trainer = BpeTrainer(vocab_size=25000, show_progress=True, initial_alphabet=ByteLevel.alphabet())\n",
|
||||||
"tokenizer.train(trainer, [\"big.txt\"])\n",
|
"tokenizer.train(files=[\"big.txt\"], trainer=trainer)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Trained vocab size: {}\".format(tokenizer.get_vocab_size()))"
|
"print(\"Trained vocab size: {}\".format(tokenizer.get_vocab_size()))"
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user