Remove 50k limits bug
This commit is contained in:
committed by
Julien Chaumond
parent
6af5a54c28
commit
8e093e5981
@@ -90,7 +90,6 @@ class LineByLineTextDataset(Dataset):
|
|||||||
with open(file_path, encoding="utf-8") as f:
|
with open(file_path, encoding="utf-8") as f:
|
||||||
lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())]
|
lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())]
|
||||||
|
|
||||||
lines = lines[:50_000]
|
|
||||||
batch_encoding = tokenizer.batch_encode_plus(lines, add_special_tokens=True, max_length=block_size)
|
batch_encoding = tokenizer.batch_encode_plus(lines, add_special_tokens=True, max_length=block_size)
|
||||||
self.examples = batch_encoding["input_ids"]
|
self.examples = batch_encoding["input_ids"]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user