Merge pull request #2130 from huggingface/ignored-index-coherence
[BREAKING CHANGE] Setting all ignored index to the PyTorch standard
This commit is contained in:
@@ -364,7 +364,7 @@ class XxxForMaskedLM(XxxPreTrainedModel):
|
||||
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||
Labels for computing the masked language modeling loss.
|
||||
Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
|
||||
Tokens with indices set to ``-1`` are ignored (masked), the loss is only computed for the tokens with labels
|
||||
Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
|
||||
in ``[0, ..., config.vocab_size]``
|
||||
|
||||
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||
@@ -415,7 +415,7 @@ class XxxForMaskedLM(XxxPreTrainedModel):
|
||||
|
||||
outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here
|
||||
if masked_lm_labels is not None:
|
||||
loss_fct = CrossEntropyLoss(ignore_index=-1)
|
||||
loss_fct = CrossEntropyLoss()
|
||||
masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
|
||||
outputs = (masked_lm_loss,) + outputs
|
||||
|
||||
|
||||
Reference in New Issue
Block a user