fix python2 tests
This commit is contained in:
@@ -12,9 +12,7 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@@ -47,7 +45,7 @@ def create_and_check_save_and_load_tokenizer(tester, tokenizer_class, *inputs, *
|
|||||||
def create_and_check_pickle_tokenizer(tester, tokenizer_class, *inputs, **kwargs):
|
def create_and_check_pickle_tokenizer(tester, tokenizer_class, *inputs, **kwargs):
|
||||||
tokenizer = tokenizer_class(*inputs, **kwargs)
|
tokenizer = tokenizer_class(*inputs, **kwargs)
|
||||||
|
|
||||||
text = "Munich and Berlin are nice cities"
|
text = u"Munich and Berlin are nice cities"
|
||||||
filename = u"/tmp/tokenizer.bin"
|
filename = u"/tmp/tokenizer.bin"
|
||||||
|
|
||||||
subwords = tokenizer.tokenize(text)
|
subwords = tokenizer.tokenize(text)
|
||||||
|
|||||||
@@ -101,8 +101,12 @@ class PreTrainedTokenizer(object):
|
|||||||
max_len = cls.max_model_input_sizes[pretrained_model_name_or_path]
|
max_len = cls.max_model_input_sizes[pretrained_model_name_or_path]
|
||||||
kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
|
kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
|
||||||
|
|
||||||
|
# Merge resolved_vocab_files arguments in kwargs.
|
||||||
|
for args_name, file_path in resolved_vocab_files.items():
|
||||||
|
kwargs[args_name] = file_path
|
||||||
|
|
||||||
# Instantiate tokenizer.
|
# Instantiate tokenizer.
|
||||||
tokenizer = cls(*inputs, **resolved_vocab_files, **kwargs)
|
tokenizer = cls(*inputs, **kwargs)
|
||||||
|
|
||||||
return tokenizer
|
return tokenizer
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user