fix
Remove '\n' before adding token into vocab
This commit is contained in:
@@ -69,6 +69,7 @@ def load_vocab(vocab_file):
|
|||||||
with open(vocab_file, "r", encoding="utf-8") as reader:
|
with open(vocab_file, "r", encoding="utf-8") as reader:
|
||||||
tokens = reader.readlines()
|
tokens = reader.readlines()
|
||||||
for index, token in enumerate(tokens):
|
for index, token in enumerate(tokens):
|
||||||
|
token = token[:-1]
|
||||||
vocab[token] = index
|
vocab[token] = index
|
||||||
return vocab
|
return vocab
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user