docs: update link huggingface map (#26077)
This commit is contained in:
@@ -684,7 +684,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
tokenized_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
batched=True,
|
||||
|
||||
@@ -607,7 +607,7 @@ def main():
|
||||
# to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
lm_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
|
||||
@@ -625,7 +625,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
tokenized_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
batched=True,
|
||||
|
||||
@@ -715,7 +715,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
tokenized_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
batched=True,
|
||||
|
||||
@@ -533,7 +533,7 @@ def main():
|
||||
# to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
with training_args.main_process_first(desc="grouping texts together"):
|
||||
if not data_args.streaming:
|
||||
|
||||
@@ -473,7 +473,7 @@ def main():
|
||||
# to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
with accelerator.main_process_first():
|
||||
lm_datasets = tokenized_datasets.map(
|
||||
|
||||
@@ -547,7 +547,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
with training_args.main_process_first(desc="grouping texts together"):
|
||||
if not data_args.streaming:
|
||||
|
||||
@@ -504,7 +504,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
with accelerator.main_process_first():
|
||||
tokenized_datasets = tokenized_datasets.map(
|
||||
|
||||
@@ -478,7 +478,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
with training_args.main_process_first(desc="grouping texts together"):
|
||||
tokenized_datasets = tokenized_datasets.map(
|
||||
|
||||
@@ -395,7 +395,7 @@ def main():
|
||||
# to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
lm_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
|
||||
@@ -459,7 +459,7 @@ def main():
|
||||
# to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
lm_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
|
||||
@@ -474,7 +474,7 @@ def main():
|
||||
# might be slower to preprocess.
|
||||
#
|
||||
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
||||
# https://huggingface.co/docs/datasets/process#map
|
||||
|
||||
tokenized_datasets = tokenized_datasets.map(
|
||||
group_texts,
|
||||
|
||||
Reference in New Issue
Block a user