Merge branch 'master' into tf2

This commit is contained in:
thomwolf
2019-09-26 12:02:54 +02:00
10 changed files with 146 additions and 70 deletions

View File

@@ -36,7 +36,6 @@ class OpenAIGPTConfig(PretrainedConfig):
Args:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
n_special: The number of special tokens to learn during fine-tuning ('[SEP]', '[CLF]', ...)
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.

View File

@@ -183,8 +183,8 @@ class XLNetTokenizer(PreTrainedTokenizer):
def add_special_tokens_single_sequence(self, token_ids):
"""
Adds special tokens to a sequence pair for sequence classification tasks.
An XLNet sequence pair has the following format: A [SEP] B [SEP][CLS]
Adds special tokens to a sequence for sequence classification tasks.
An XLNet sequence has the following format: X [SEP][CLS]
"""
sep = [self.sep_token_id]
cls = [self.cls_token_id]
@@ -192,8 +192,8 @@ class XLNetTokenizer(PreTrainedTokenizer):
def add_special_tokens_sequence_pair(self, token_ids_0, token_ids_1):
"""
Adds special tokens to a sequence for sequence classification tasks.
An XLNet sequence has the following format: X [SEP][CLS]
Adds special tokens to a sequence pair for sequence classification tasks.
An XLNet sequence pair has the following format: A [SEP] B [SEP][CLS]
"""
sep = [self.sep_token_id]