add option mentioned in #940
This commit is contained in:
@@ -247,6 +247,9 @@ def evaluate(args, model, tokenizer, prefix=""):
|
|||||||
|
|
||||||
|
|
||||||
def load_and_cache_examples(args, task, tokenizer, evaluate=False):
|
def load_and_cache_examples(args, task, tokenizer, evaluate=False):
|
||||||
|
if args.local_rank not in [-1, 0]:
|
||||||
|
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
|
||||||
|
|
||||||
processor = processors[task]()
|
processor = processors[task]()
|
||||||
output_mode = output_modes[task]
|
output_mode = output_modes[task]
|
||||||
# Load data features from cache or dataset file
|
# Load data features from cache or dataset file
|
||||||
@@ -273,6 +276,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
|
|||||||
logger.info("Saving features into cached file %s", cached_features_file)
|
logger.info("Saving features into cached file %s", cached_features_file)
|
||||||
torch.save(features, cached_features_file)
|
torch.save(features, cached_features_file)
|
||||||
|
|
||||||
|
if args.local_rank == 0:
|
||||||
|
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
|
||||||
|
|
||||||
# Convert to Tensors and build dataset
|
# Convert to Tensors and build dataset
|
||||||
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
||||||
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
|
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
|
||||||
|
|||||||
@@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""):
|
|||||||
|
|
||||||
|
|
||||||
def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
|
def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
|
||||||
|
if args.local_rank not in [-1, 0]:
|
||||||
|
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
|
||||||
|
|
||||||
# Load data features from cache or dataset file
|
# Load data features from cache or dataset file
|
||||||
input_file = args.predict_file if evaluate else args.train_file
|
input_file = args.predict_file if evaluate else args.train_file
|
||||||
cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format(
|
cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format(
|
||||||
@@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
|
|||||||
logger.info("Saving features into cached file %s", cached_features_file)
|
logger.info("Saving features into cached file %s", cached_features_file)
|
||||||
torch.save(features, cached_features_file)
|
torch.save(features, cached_features_file)
|
||||||
|
|
||||||
|
if args.local_rank == 0:
|
||||||
|
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
|
||||||
|
|
||||||
# Convert to Tensors and build dataset
|
# Convert to Tensors and build dataset
|
||||||
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
||||||
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
|
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
|
||||||
|
|||||||
Reference in New Issue
Block a user