fixed GPT-2 tokenization on python 2
This commit is contained in:
@@ -32,7 +32,7 @@ class OpenAIGPTTokenizationTest(unittest.TestCase):
|
||||
vocab_tokens = dict(zip(vocab, range(len(vocab))))
|
||||
merges = ["#version: 0.2", "l o", "lo w", "e r</w>", ""]
|
||||
with open("/tmp/openai_tokenizer_vocab_test.json", "w") as fp:
|
||||
json.dump(vocab_tokens, fp)
|
||||
fp.write(json.dumps(vocab_tokens))
|
||||
vocab_file = fp.name
|
||||
with open("/tmp/openai_tokenizer_merges_test.txt", "w") as fp:
|
||||
fp.write("\n".join(merges))
|
||||
|
||||
Reference in New Issue
Block a user