Fix tokenizers training in notebook (#10110)

This commit is contained in:
Anthony MOI
2021-02-09 21:48:22 -05:00
committed by GitHub
parent 85395e4901
commit 1fbaa3c117

View File

@@ -229,7 +229,7 @@
"\n", "\n",
"# We initialize our trainer, giving him the details about the vocabulary we want to generate\n", "# We initialize our trainer, giving him the details about the vocabulary we want to generate\n",
"trainer = BpeTrainer(vocab_size=25000, show_progress=True, initial_alphabet=ByteLevel.alphabet())\n", "trainer = BpeTrainer(vocab_size=25000, show_progress=True, initial_alphabet=ByteLevel.alphabet())\n",
"tokenizer.train(trainer, [\"big.txt\"])\n", "tokenizer.train(files=[\"big.txt\"], trainer=trainer)\n",
"\n", "\n",
"print(\"Trained vocab size: {}\".format(tokenizer.get_vocab_size()))" "print(\"Trained vocab size: {}\".format(tokenizer.get_vocab_size()))"
] ]