fix for python2
This commit is contained in:
@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
import os
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
import json
|
import json
|
||||||
|
from io import open
|
||||||
|
|
||||||
from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer, VOCAB_FILES_NAMES
|
from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer, VOCAB_FILES_NAMES
|
||||||
|
|
||||||
@@ -55,8 +56,8 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester):
|
|||||||
|
|
||||||
def test_full_tokenizer(self):
|
def test_full_tokenizer(self):
|
||||||
tokenizer = GPT2Tokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map)
|
tokenizer = GPT2Tokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map)
|
||||||
text = "lower"
|
text = "lower newer"
|
||||||
bpe_tokens = ["\u0120low", "er"]
|
bpe_tokens = ["\u0120low", "er", "\u0120newer"]
|
||||||
tokens = tokenizer.tokenize(text)
|
tokens = tokenizer.tokenize(text)
|
||||||
self.assertListEqual(tokens, bpe_tokens)
|
self.assertListEqual(tokens, bpe_tokens)
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import unittest
|
import unittest
|
||||||
|
from io import open
|
||||||
|
|
||||||
from pytorch_transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES
|
from pytorch_transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES
|
||||||
from .tokenization_tests_commons import CommonTestCases
|
from .tokenization_tests_commons import CommonTestCases
|
||||||
|
|||||||
Reference in New Issue
Block a user