Removed dataset limit

This commit is contained in:
LysandreJik
2019-08-06 14:57:07 -04:00
parent 3e3e145497
commit 5c18825a18

View File

@@ -14,7 +14,7 @@ class WikiTextDataset(Dataset):
with open(os.path.join(directory, f"wiki.{file}.raw"), encoding="utf-8") as f:
text = f.read()
spans = list(filter(lambda item: len(item) > 120, text.split("\n")[:20]))
spans = list(filter(lambda item: len(item) > 120, text.split("\n")))
for span in spans:
span = tokenizer.encode(span)