From 53c8f700f4704a58f4684674ced1c57d6ca9240c Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 20 Aug 2019 11:29:26 +0200 Subject: [PATCH] fix #808 --- pytorch_transformers/modeling_bert.py | 5 ++++- pytorch_transformers/modeling_gpt2.py | 2 ++ pytorch_transformers/modeling_openai.py | 2 ++ pytorch_transformers/modeling_roberta.py | 4 ++++ pytorch_transformers/modeling_transfo_xl.py | 2 ++ pytorch_transformers/modeling_xlm.py | 4 ++++ pytorch_transformers/modeling_xlnet.py | 2 ++ 7 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pytorch_transformers/modeling_bert.py b/pytorch_transformers/modeling_bert.py index 9c20eac9bf..7b34b3fd90 100644 --- a/pytorch_transformers/modeling_bert.py +++ b/pytorch_transformers/modeling_bert.py @@ -599,7 +599,10 @@ BERT_INPUTS_DOCSTRING = r""" ``tokens: [CLS] the dog is hairy . [SEP]`` ``token_type_ids: 0 0 0 0 0 0 0`` - + + Bert is a model with absolute position embeddings so it's usually advised to pad the inputs on + the right rather than the left. + Indices can be obtained using :class:`pytorch_transformers.BertTokenizer`. See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index f67d0e88d5..91d01d0584 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -390,6 +390,8 @@ GPT2_START_DOCSTRING = r""" OpenAI GPT-2 model was proposed in GPT2_INPUTS_DOCSTRING = r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Indices of input sequence tokens in the vocabulary. + GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on + the right rather than the left. Indices can be obtained using :class:`pytorch_transformers.BPT2Tokenizer`. See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py index e8648487be..71ffb78e0f 100644 --- a/pytorch_transformers/modeling_openai.py +++ b/pytorch_transformers/modeling_openai.py @@ -404,6 +404,8 @@ OPENAI_GPT_START_DOCSTRING = r""" OpenAI GPT model was proposed in OPENAI_GPT_INPUTS_DOCSTRING = r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Indices of input sequence tokens in the vocabulary. + GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on + the right rather than the left. Indices can be obtained using :class:`pytorch_transformers.BPT2Tokenizer`. See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. diff --git a/pytorch_transformers/modeling_roberta.py b/pytorch_transformers/modeling_roberta.py index e3065cf60b..e49b2a06b1 100644 --- a/pytorch_transformers/modeling_roberta.py +++ b/pytorch_transformers/modeling_roberta.py @@ -110,6 +110,10 @@ ROBERTA_INPUTS_DOCSTRING = r""" Fully encoded sequences or sequence pairs can be obtained using the RobertaTokenizer.encode function with the ``add_special_tokens`` parameter set to ``True``. + + RoBERTa is a model with absolute position embeddings so it's usually advised to pad the inputs on + the right rather than the left. + See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. **position_ids**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: diff --git a/pytorch_transformers/modeling_transfo_xl.py b/pytorch_transformers/modeling_transfo_xl.py index 553a71fffe..3cfdee38cb 100644 --- a/pytorch_transformers/modeling_transfo_xl.py +++ b/pytorch_transformers/modeling_transfo_xl.py @@ -936,6 +936,8 @@ TRANSFO_XL_INPUTS_DOCSTRING = r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Indices of input sequence tokens in the vocabulary. + Transformer-XL is a model with relative position embeddings so you can either pad the inputs on + the right or on the left. Indices can be obtained using :class:`pytorch_transformers.TransfoXLTokenizer`. See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. diff --git a/pytorch_transformers/modeling_xlm.py b/pytorch_transformers/modeling_xlm.py index d01d245bbb..be2767ed0c 100644 --- a/pytorch_transformers/modeling_xlm.py +++ b/pytorch_transformers/modeling_xlm.py @@ -424,6 +424,10 @@ XLM_INPUTS_DOCSTRING = r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Indices of input sequence tokens in the vocabulary. + + XLM is a model with absolute position embeddings so it's usually advised to pad the inputs on + the right rather than the left. + Indices can be obtained using :class:`pytorch_transformers.XLMTokenizer`. See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details. diff --git a/pytorch_transformers/modeling_xlnet.py b/pytorch_transformers/modeling_xlnet.py index af33c5a6c2..d44821788e 100644 --- a/pytorch_transformers/modeling_xlnet.py +++ b/pytorch_transformers/modeling_xlnet.py @@ -655,6 +655,8 @@ XLNET_INPUTS_DOCSTRING = r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Indices of input sequence tokens in the vocabulary. + XLNet is a model with relative position embeddings so you can either pad the inputs on + the right or on the left. Indices can be obtained using :class:`pytorch_transformers.XLNetTokenizer`. See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and :func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.