[M2M100] fix positional embeddings (#10590)

* fix tests

* emb should be a parameter

* fix positional embeddings

* fix make_weights

* don't save pos embeds

* add comment to describe the clamping
This commit is contained in:
Suraj Patil
2021-03-08 16:06:19 +05:30
committed by GitHub
parent d59464db6b
commit 2a737bffef
2 changed files with 29 additions and 9 deletions

View File

@@ -96,13 +96,19 @@ class M2M100ModelTester:
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
3,
)
input_ids[:, -1] = self.eos_token_id # Eos Token
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
# we need to clamp the input ids here to avoid having pad token in between
# this is because for M2M100 the position_ids are prepared such that
# all pad tokens have pos id = 2 and rest are between 2..seq_length
# and the seq_length here is seq_length - num_pad_tokens
# but when using past, there is no way of knowing if the past input ids had
# pad tokens in them, which results in incorrect seq_lenth and which in turn results in
# position_ids being off by num_pad_tokens in past input
input_ids = input_ids.clamp(self.pad_token_id + 1)
decoder_input_ids = decoder_input_ids.clamp(self.pad_token_id + 1)
config = M2M100Config(
vocab_size=self.vocab_size,
d_model=self.hidden_size,