Broken links fixed related to datasets docs (#27569)
fixed the broken links belogs to dataset library of transformers
This commit is contained in:
@@ -340,7 +340,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# 5. Load pretrained model, tokenizer, and image processor
|
||||
if model_args.tokenizer_name:
|
||||
|
||||
@@ -388,7 +388,7 @@ def main():
|
||||
)
|
||||
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -368,7 +368,7 @@ def main():
|
||||
)
|
||||
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -382,7 +382,7 @@ def main():
|
||||
)
|
||||
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -371,7 +371,7 @@ def main():
|
||||
)
|
||||
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -352,7 +352,7 @@ def main():
|
||||
)
|
||||
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -329,7 +329,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
|
||||
|
||||
@@ -366,7 +366,7 @@ def main():
|
||||
for split in raw_datasets.keys():
|
||||
raw_datasets[split] = raw_datasets[split].select(range(100))
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
if raw_datasets["train"] is not None:
|
||||
column_names = raw_datasets["train"].column_names
|
||||
|
||||
@@ -337,7 +337,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -325,7 +325,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -369,7 +369,7 @@ def main():
|
||||
extension = args.train_file.split(".")[-1]
|
||||
raw_datasets = load_dataset(extension, data_files=data_files, field="data")
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -417,7 +417,7 @@ def main():
|
||||
extension = args.train_file.split(".")[-1]
|
||||
raw_datasets = load_dataset(extension, data_files=data_files, field="data")
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -382,7 +382,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -134,7 +134,7 @@ of **0.36**.
|
||||
|
||||
### Multi GPU CTC with Dataset Streaming
|
||||
|
||||
The following command shows how to use [Dataset Streaming mode](https://huggingface.co/docs/datasets/dataset_streaming.html)
|
||||
The following command shows how to use [Dataset Streaming mode](https://huggingface.co/docs/datasets/dataset_streaming)
|
||||
to fine-tune [XLS-R](https://huggingface.co/transformers/main/model_doc/xls_r.html)
|
||||
on [Common Voice](https://huggingface.co/datasets/common_voice) using 4 GPUs in half-precision.
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ For the old `finetune_trainer.py` and related utils, see [`examples/legacy/seq2s
|
||||
|
||||
`run_summarization.py` is a lightweight example of how to download and preprocess a dataset from the [🤗 Datasets](https://github.com/huggingface/datasets) library or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it.
|
||||
|
||||
For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets.html#json-files
|
||||
For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets#json-files
|
||||
and you also will find examples of these below.
|
||||
|
||||
## With Trainer
|
||||
|
||||
@@ -432,7 +432,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -409,7 +409,7 @@ def main():
|
||||
extension = args.train_file.split(".")[-1]
|
||||
raw_datasets = load_dataset(extension, data_files=data_files)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
@@ -396,7 +396,7 @@ def main():
|
||||
)
|
||||
|
||||
# See more about loading any type of standard or custom dataset at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
if data_args.remove_splits is not None:
|
||||
for split in data_args.remove_splits.split(","):
|
||||
|
||||
@@ -355,7 +355,7 @@ def main():
|
||||
token=model_args.token,
|
||||
)
|
||||
# See more about loading any type of standard or custom dataset at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Labels
|
||||
if data_args.task_name is not None:
|
||||
@@ -372,7 +372,7 @@ def main():
|
||||
num_labels = 1
|
||||
else:
|
||||
# A useful fast method:
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
|
||||
# https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.unique
|
||||
label_list = raw_datasets["train"].unique("label")
|
||||
label_list.sort() # Let's sort it for determinism
|
||||
num_labels = len(label_list)
|
||||
|
||||
@@ -293,7 +293,7 @@ def main():
|
||||
extension = (args.train_file if args.train_file is not None else args.validation_file).split(".")[-1]
|
||||
raw_datasets = load_dataset(extension, data_files=data_files)
|
||||
# See more about loading any type of standard or custom dataset at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Labels
|
||||
if args.task_name is not None:
|
||||
|
||||
@@ -318,7 +318,7 @@ def main():
|
||||
extension = data_args.train_file.split(".")[-1]
|
||||
raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
if training_args.do_train:
|
||||
column_names = raw_datasets["train"].column_names
|
||||
|
||||
@@ -348,7 +348,7 @@ def main():
|
||||
for split in raw_datasets.keys():
|
||||
raw_datasets[split] = raw_datasets[split].select(range(100))
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
if raw_datasets["train"] is not None:
|
||||
column_names = raw_datasets["train"].column_names
|
||||
|
||||
@@ -33,7 +33,7 @@ For the old `finetune_trainer.py` and related utils, see [`examples/legacy/seq2s
|
||||
|
||||
`run_translation.py` is a lightweight examples of how to download and preprocess a dataset from the [🤗 Datasets](https://github.com/huggingface/datasets) library or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it.
|
||||
|
||||
For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets.html#json-files
|
||||
For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets#json-files
|
||||
and you also will find examples of these below.
|
||||
|
||||
|
||||
|
||||
@@ -389,7 +389,7 @@ def main():
|
||||
extension = args.train_file.split(".")[-1]
|
||||
raw_datasets = load_dataset(extension, data_files=data_files)
|
||||
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
||||
# https://huggingface.co/docs/datasets/loading_datasets.
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user