[M2M100] fix positional embeddings (#10590)
* fix tests * emb should be a parameter * fix positional embeddings * fix make_weights * don't save pos embeds * add comment to describe the clamping
This commit is contained in:
@@ -96,13 +96,19 @@ class M2M100ModelTester:
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
|
||||
3,
|
||||
)
|
||||
input_ids[:, -1] = self.eos_token_id # Eos Token
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
# we need to clamp the input ids here to avoid having pad token in between
|
||||
# this is because for M2M100 the position_ids are prepared such that
|
||||
# all pad tokens have pos id = 2 and rest are between 2..seq_length
|
||||
# and the seq_length here is seq_length - num_pad_tokens
|
||||
# but when using past, there is no way of knowing if the past input ids had
|
||||
# pad tokens in them, which results in incorrect seq_lenth and which in turn results in
|
||||
# position_ids being off by num_pad_tokens in past input
|
||||
input_ids = input_ids.clamp(self.pad_token_id + 1)
|
||||
decoder_input_ids = decoder_input_ids.clamp(self.pad_token_id + 1)
|
||||
|
||||
config = M2M100Config(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
|
||||
Reference in New Issue
Block a user