Remove '\n' before adding token into vocab
This commit is contained in:
Yiqing-Zhou
2019-07-22 22:30:49 +08:00
committed by GitHub
parent 897d0841be
commit bef0c629ca

View File

@@ -69,6 +69,7 @@ def load_vocab(vocab_file):
with open(vocab_file, "r", encoding="utf-8") as reader: with open(vocab_file, "r", encoding="utf-8") as reader:
tokens = reader.readlines() tokens = reader.readlines()
for index, token in enumerate(tokens): for index, token in enumerate(tokens):
token = token[:-1]
vocab[token] = index vocab[token] = index
return vocab return vocab