Removed dataset limit
This commit is contained in:
@@ -14,7 +14,7 @@ class WikiTextDataset(Dataset):
|
|||||||
|
|
||||||
with open(os.path.join(directory, f"wiki.{file}.raw"), encoding="utf-8") as f:
|
with open(os.path.join(directory, f"wiki.{file}.raw"), encoding="utf-8") as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
spans = list(filter(lambda item: len(item) > 120, text.split("\n")[:20]))
|
spans = list(filter(lambda item: len(item) > 120, text.split("\n")))
|
||||||
|
|
||||||
for span in spans:
|
for span in spans:
|
||||||
span = tokenizer.encode(span)
|
span = tokenizer.encode(span)
|
||||||
|
|||||||
Reference in New Issue
Block a user