From cdf4cd7068d1ccd85e94457860007097bf84b08e Mon Sep 17 00:00:00 2001 From: Tomo Lazovich Date: Fri, 10 Jul 2020 16:34:21 -0400 Subject: [PATCH] [squad] add version tag to squad cache (#5669) --- src/transformers/data/datasets/squad.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transformers/data/datasets/squad.py b/src/transformers/data/datasets/squad.py index 7e2028c25a..1cbc305d4a 100644 --- a/src/transformers/data/datasets/squad.py +++ b/src/transformers/data/datasets/squad.py @@ -113,9 +113,12 @@ class SquadDataset(Dataset): raise KeyError("mode is not a valid split name") self.mode = mode # Load data features from cache or dataset file + version_tag = "v2" if args.version_2_with_negative else "v1" cached_features_file = os.path.join( cache_dir if cache_dir is not None else args.data_dir, - "cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(args.max_seq_length),), + "cached_{}_{}_{}_{}".format( + mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), version_tag, + ), ) # Make sure only the first process in distributed training processes the dataset,