add draft version of propsoed changes for ROGUE score

2020-03-09 00:33:12 +01:00
parent a5751f7578
commit 41b437ea3a
2 changed files with 14 additions and 9 deletions
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -846,7 +846,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
            encoder_inputs = input_ids
            input_ids = torch.full(
                (effective_batch_size * num_beams, 1),
-                eos_token_id,  # TODO (PVP): to check if this is the only solution -> quite hacky to do this
+                #                eos_token_id,  # TODO (PVP): to check if this is the only solution -> quite hacky to do this
+                bos_token_id,
                dtype=torch.long,
                device=next(self.parameters()).device,
            )
@@ -1079,10 +1080,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
                next_token_logits = next_token_logits / temperature

            scores = F.log_softmax(next_token_logits, dim=-1)  # (batch_size * num_beams, vocab_size)
-            if (
-                self.config.is_encoder_decoder and do_sample is False
-            ):  # TODO(PVP) to be refactored later - do we need this boolean flag here? Also Only add for beam_search or also for no_beam_search? The prepare scores fn is ugly here
-                scores = self.prepare_scores_for_generation(scores, cur_len, max_length)
+            #            if (
+            #                self.config.is_encoder_decoder and do_sample is False
+            #            ):  # TODO(PVP) to be refactored later - do we need this boolean flag here? Also Only add for beam_search or also for no_beam_search? The prepare scores fn is ugly here
+            #                scores = self.prepare_scores_for_generation(scores, cur_len, max_length)

            # set eos token prob to zero if min_length is not reached
            if eos_token_ids is not None and cur_len < min_length:
@@ -1271,9 +1272,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
            assert (len(hypo) == max_length for hypo in best)
            decoded = torch.stack(best).type(torch.long).to(next(self.parameters()).device)

-        if self.config.is_encoder_decoder:
-            # do not return first <EOS> token
-            return decoded[:, 1:]
+        #        if self.config.is_encoder_decoder:
+        # do not return first <EOS> token
+        #            return decoded[:, 1:]
        return decoded

    # force one of token_ids to be generated by setting prob of all other tokens to 0.