From cb9db101c744276a5028f5b8c675c35536f2096f Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Sun, 4 Aug 2019 22:04:15 -0400 Subject: [PATCH] Python 2 must DIE --- pytorch_transformers/modeling_roberta.py | 6 +++--- .../tests/tokenization_roberta_test.py | 10 ++++++---- pytorch_transformers/tokenization_roberta.py | 4 +++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pytorch_transformers/modeling_roberta.py b/pytorch_transformers/modeling_roberta.py index b92ffd0433..109a719616 100644 --- a/pytorch_transformers/modeling_roberta.py +++ b/pytorch_transformers/modeling_roberta.py @@ -58,7 +58,7 @@ class RobertaEmbeddings(BertEmbeddings): # cf. fairseq's `utils.make_positions` position_ids = torch.arange(self.padding_idx+1, seq_length+self.padding_idx+1, dtype=torch.long, device=input_ids.device) position_ids = position_ids.unsqueeze(0).expand_as(input_ids) - return super().forward(input_ids, token_type_ids=token_type_ids, position_ids=position_ids) + return super(RobertaEmbeddings, self).forward(input_ids, token_type_ids=token_type_ids, position_ids=position_ids) class RobertaConfig(BertConfig): @@ -109,8 +109,8 @@ class RobertaForMaskedLM(BertPreTrainedModel): class RobertaLMHead(nn.Module): """Roberta Head for masked language modeling.""" - def __init__(self, config: BertConfig): - super().__init__() + def __init__(self, config): + super(RobertaLMHead, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) diff --git a/pytorch_transformers/tests/tokenization_roberta_test.py b/pytorch_transformers/tests/tokenization_roberta_test.py index 01268f7d25..cd4e17ec34 100644 --- a/pytorch_transformers/tests/tokenization_roberta_test.py +++ b/pytorch_transformers/tests/tokenization_roberta_test.py @@ -18,6 +18,7 @@ from __future__ import (absolute_import, division, print_function, import os import unittest import pytest +import six from pytorch_transformers.tokenization_roberta import RobertaTokenizer @@ -31,10 +32,11 @@ class RobertaTokenizationTest(unittest.TestCase): tokenizer.encode('Hello world!'), [0, 31414, 232, 328, 2] ) - self.assertListEqual( - tokenizer.encode('Hello world! cécé herlolip'), - [0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2] - ) + if six.PY3: + self.assertListEqual( + tokenizer.encode('Hello world! cécé herlolip'), + [0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2] + ) diff --git a/pytorch_transformers/tokenization_roberta.py b/pytorch_transformers/tokenization_roberta.py index 92717c6dd1..4f9a7bc0fa 100644 --- a/pytorch_transformers/tokenization_roberta.py +++ b/pytorch_transformers/tokenization_roberta.py @@ -19,6 +19,8 @@ from __future__ import (absolute_import, division, print_function, import json import logging import re +from io import open +import six from .tokenization_utils import PreTrainedTokenizer from .tokenization_gpt2 import GPT2Tokenizer @@ -125,7 +127,7 @@ class Dictionary(object): Loads a pre-existing dictionary from a text file and adds its symbols to this instance. """ - if isinstance(f, str): + if isinstance(f, six.string_types): try: if not ignore_utf_errors: with open(f, 'r', encoding='utf-8') as fd: