fix (#4419)

2020-05-18 15:51:40 +02:00
parent 31c799a0c9
commit a27c795908
2 changed files with 12 additions and 14 deletions
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -1236,13 +1236,15 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
            else:
                tokens_to_add = next_token

+            # add token and increase length by one
            input_ids = torch.cat([input_ids, tokens_to_add.unsqueeze(-1)], dim=-1)
+            cur_len = cur_len + 1

            if eos_token_id is not None:
                eos_in_sents = tokens_to_add == eos_token_id
                # if sentence is unfinished and the token to add is eos, sent_lengths is filled with current length
                is_sents_unfinished_and_token_to_add_is_eos = unfinished_sents.mul(eos_in_sents.long()).bool()
-                sent_lengths.masked_fill_(is_sents_unfinished_and_token_to_add_is_eos, cur_len + 1)
+                sent_lengths.masked_fill_(is_sents_unfinished_and_token_to_add_is_eos, cur_len)
                # unfinished_sents is set to zero if eos in sentence
                unfinished_sents.mul_((~eos_in_sents).long())

@@ -1256,8 +1258,6 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
                )

-            cur_len = cur_len + 1
-
        # if there are different sentences lengths in the batch, some batches have to be padded
        if sent_lengths.min().item() != sent_lengths.max().item():
            assert pad_token_id is not None, "`Pad_token_id` has to be defined if batches have different lengths"
@@ -1473,9 +1473,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
            beam_tokens = input_ids.new([x[1] for x in next_batch_beam])
            beam_idx = input_ids.new([x[2] for x in next_batch_beam])

-            # re-order batch
+            # re-order batch and update current length
            input_ids = input_ids[beam_idx, :]
            input_ids = torch.cat([input_ids, beam_tokens.unsqueeze(1)], dim=-1)
+            cur_len = cur_len + 1
+
            # re-order internal states
            if past is not None:
                past = self._reorder_cache(past, beam_idx)
@@ -1486,9 +1488,6 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
                )

-            # update current length
-            cur_len = cur_len + 1
-
        # finalize all open beam hypotheses and end to generated hypotheses
        for batch_idx in range(batch_size):
            if done[batch_idx]: