docs: update link huggingface map (#26077)

This commit is contained in:
Phuc Van Phan
2023-09-11 18:57:04 +07:00
committed by GitHub
parent 7fd2d68613
commit 9cebae64ad
15 changed files with 15 additions and 15 deletions

View File

@@ -533,7 +533,7 @@ def main():
# to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
# https://huggingface.co/docs/datasets/process#map
with training_args.main_process_first(desc="grouping texts together"):
if not data_args.streaming:

View File

@@ -473,7 +473,7 @@ def main():
# to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
# https://huggingface.co/docs/datasets/process#map
with accelerator.main_process_first():
lm_datasets = tokenized_datasets.map(

View File

@@ -547,7 +547,7 @@ def main():
# might be slower to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
# https://huggingface.co/docs/datasets/process#map
with training_args.main_process_first(desc="grouping texts together"):
if not data_args.streaming:

View File

@@ -504,7 +504,7 @@ def main():
# might be slower to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
# https://huggingface.co/docs/datasets/process#map
with accelerator.main_process_first():
tokenized_datasets = tokenized_datasets.map(

View File

@@ -478,7 +478,7 @@ def main():
# might be slower to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
# https://huggingface.co/docs/datasets/process#map
with training_args.main_process_first(desc="grouping texts together"):
tokenized_datasets = tokenized_datasets.map(