From ca1a00a302c6aff525d949d398ee6bfe42e3e194 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Fri, 30 Aug 2019 12:29:31 +0200 Subject: [PATCH] fix for python2 --- pytorch_transformers/tests/tokenization_gpt2_test.py | 5 +++-- pytorch_transformers/tests/tokenization_roberta_test.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_transformers/tests/tokenization_gpt2_test.py b/pytorch_transformers/tests/tokenization_gpt2_test.py index 8ba3be7e5d..3e4fb5bc1d 100644 --- a/pytorch_transformers/tests/tokenization_gpt2_test.py +++ b/pytorch_transformers/tests/tokenization_gpt2_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest import json +from io import open from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer, VOCAB_FILES_NAMES @@ -55,8 +56,8 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester): def test_full_tokenizer(self): tokenizer = GPT2Tokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map) - text = "lower" - bpe_tokens = ["\u0120low", "er"] + text = "lower newer" + bpe_tokens = ["\u0120low", "er", "\u0120newer"] tokens = tokenizer.tokenize(text) self.assertListEqual(tokens, bpe_tokens) diff --git a/pytorch_transformers/tests/tokenization_roberta_test.py b/pytorch_transformers/tests/tokenization_roberta_test.py index 960a91a5e1..e2082e7613 100644 --- a/pytorch_transformers/tests/tokenization_roberta_test.py +++ b/pytorch_transformers/tests/tokenization_roberta_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import json import unittest +from io import open from pytorch_transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES from .tokenization_tests_commons import CommonTestCases