Encode and Decode are back in the superclass. They now handle sentence pairs special tokens.

2019-08-08 18:20:32 -04:00
parent e367ac469c
commit 6c41a8f5dc
4 changed files with 81 additions and 79 deletions
--- a/pytorch_transformers/init.py
+++ b/pytorch_transformers/init.py
@@ -7,7 +7,6 @@ from .tokenization_gpt2 import GPT2Tokenizer
 from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
 from .tokenization_xlm import XLMTokenizer
 from .tokenization_roberta import RobertaTokenizer
-from .tokenization_utils import (PreTrainedTokenizer, clean_up_tokenization)

 from .tokenization_utils import (PreTrainedTokenizer)

@@ -39,7 +38,7 @@ from .modeling_xlm import (XLMConfig, XLMPreTrainedModel , XLMModel,
                           XLMWithLMHeadModel, XLMForSequenceClassification,
                           XLMForQuestionAnswering, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
                           XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
-from .modeling_roberta import (RobertaConfig, RobertaForMaskedLM, RobertaModel,
+from .modeling_roberta import (RobertaConfig, RobertaForMaskedLM, RobertaModel, RobertaForSequenceClassification,
                               ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
 from .modeling_utils import (WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME,
                          PretrainedConfig, PreTrainedModel, prune_layer, Conv1D)