Fix group_lengths for short datasets (#12558)
This commit is contained in:
@@ -431,7 +431,8 @@ if __name__ == "__main__":
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
if total_length >= max_seq_length:
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
k: [t[i : i + max_seq_length] for i in range(0, total_length, max_seq_length)]
|
||||
|
||||
Reference in New Issue
Block a user