fixed GPT-2 tokenization on python 2

This commit is contained in:
thomwolf
2019-04-17 10:56:15 +02:00
parent bdaba1897c
commit bc70779bf0
4 changed files with 7 additions and 5 deletions

View File

@@ -32,7 +32,7 @@ class OpenAIGPTTokenizationTest(unittest.TestCase):
vocab_tokens = dict(zip(vocab, range(len(vocab))))
merges = ["#version: 0.2", "l o", "lo w", "e r</w>", ""]
with open("/tmp/openai_tokenizer_vocab_test.json", "w") as fp:
json.dump(vocab_tokens, fp)
fp.write(json.dumps(vocab_tokens))
vocab_file = fp.name
with open("/tmp/openai_tokenizer_merges_test.txt", "w") as fp:
fp.write("\n".join(merges))