From 3401980fc4446d556ca075f6a627bdee33684341 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Mon, 9 Sep 2019 10:22:12 +0300 Subject: [PATCH] fix #1208 --- pytorch_transformers/modeling_xlnet.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_transformers/modeling_xlnet.py b/pytorch_transformers/modeling_xlnet.py index 3ac791b121..d8744e95e7 100644 --- a/pytorch_transformers/modeling_xlnet.py +++ b/pytorch_transformers/modeling_xlnet.py @@ -504,8 +504,10 @@ XLNET_INPUTS_DOCSTRING = r""" :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. **token_type_ids**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: A parallel sequence of tokens (can be used to indicate various portions of the inputs). - The embeddings from these tokens will be summed with the respective token embeddings. - Indices are selected in the vocabulary (unlike BERT which has a specific vocabulary for segment indices). + The type indices in XLNet are NOT selected in the vocabulary, they can be arbitrary numbers and + the important thing is that they should be different for tokens which belong to different segments. + The model will compute relative segment differences from the given type indices: + 0 if the segment id of two tokens are the same, 1 if not. **attention_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``: Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: