From 569da80cedcf9022b869fbb398666c93b3219956 Mon Sep 17 00:00:00 2001 From: Romain Keramitas Date: Tue, 7 Jan 2020 10:09:56 +0100 Subject: [PATCH] Make doc regarding masked indices more clear. Signed-off-by: Romain Keramitas --- src/transformers/modeling_albert.py | 2 +- src/transformers/modeling_bert.py | 6 +++--- src/transformers/modeling_camembert.py | 2 +- src/transformers/modeling_ctrl.py | 2 +- src/transformers/modeling_distilbert.py | 2 +- src/transformers/modeling_gpt2.py | 2 +- src/transformers/modeling_openai.py | 4 ++-- src/transformers/modeling_roberta.py | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index dea35e695e..c1540bda5f 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -597,7 +597,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index 49f67e4416..cdc46b9662 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -826,7 +826,7 @@ class BertForPreTraining(BertPreTrainedModel): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` **next_sentence_label**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``: @@ -916,12 +916,12 @@ class BertForMaskedLM(BertPreTrainedModel): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the left-to-right language modeling loss (next word prediction). - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` diff --git a/src/transformers/modeling_camembert.py b/src/transformers/modeling_camembert.py index b16df3f9b9..720d4a1f60 100644 --- a/src/transformers/modeling_camembert.py +++ b/src/transformers/modeling_camembert.py @@ -167,7 +167,7 @@ class CamembertForMaskedLM(RobertaForMaskedLM): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` diff --git a/src/transformers/modeling_ctrl.py b/src/transformers/modeling_ctrl.py index e2aebed8c8..03e73c3311 100644 --- a/src/transformers/modeling_ctrl.py +++ b/src/transformers/modeling_ctrl.py @@ -444,7 +444,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``lm_labels = input_ids`` - Indices are selected in ``[-1, 0, ..., config.vocab_size]`` + Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 0563dcf852..475992d382 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -496,7 +496,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index 54db1cc067..679100b84d 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -513,7 +513,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``lm_labels = input_ids`` - Indices are selected in ``[-1, 0, ..., config.vocab_size]`` + Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index d8c5abc92e..ec0f09d158 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -490,7 +490,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids`` - Indices are selected in ``[-1, 0, ..., config.vocab_size]`` + Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` @@ -578,7 +578,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``lm_labels = input_ids`` - Indices are selected in ``[-1, 0, ..., config.vocab_size]`` + Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` **mc_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size)``: diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index 8958d30a22..56e983e01c 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -223,7 +223,7 @@ class RobertaForMaskedLM(BertPreTrainedModel): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. - Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) + Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``