From 8733ffcb5e660e81a353b00897f350399d7d5fb1 Mon Sep 17 00:00:00 2001 From: Matthew Carrigan Date: Thu, 21 Mar 2019 14:09:57 +0000 Subject: [PATCH] Removing a couple of other old unnecessary comments --- examples/lm_finetuning/pregenerate_training_data.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/lm_finetuning/pregenerate_training_data.py b/examples/lm_finetuning/pregenerate_training_data.py index 03fb2a763f..ac7a1ae076 100644 --- a/examples/lm_finetuning/pregenerate_training_data.py +++ b/examples/lm_finetuning/pregenerate_training_data.py @@ -1,7 +1,3 @@ -# Step 1: Slurp the dataset up, tokenize each sentence, and store as docs -> sentences -> tokens -# Step 2: Walk over the dataset, using the Google BERT logic to concatenate sentences into training examples -# Step 3: Write out the examples, possibly as Torch tensors? - from argparse import ArgumentParser from pathlib import Path from tqdm import tqdm, trange