chore: Fix typos in docs and examples (#36524)
Fix typos in docs and examples Signed-off-by: co63oc <co63oc@users.noreply.github.com>
This commit is contained in:
@@ -296,7 +296,7 @@ class DataCollatorForWav2Vec2Pretraining:
|
||||
The Wav2Vec2 model used for pretraining. The data collator needs to have access
|
||||
to config and ``_get_feat_extract_output_lengths`` function for correct padding.
|
||||
feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`):
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
@@ -445,7 +445,7 @@ def main():
|
||||
accelerator.wait_for_everyone()
|
||||
|
||||
# 1. Download and create train, validation dataset
|
||||
# We load all dataset configuration and datset split pairs passed in
|
||||
# We load all dataset configuration and dataset split pairs passed in
|
||||
# ``args.dataset_config_names`` and ``args.dataset_split_names``
|
||||
datasets_splits = []
|
||||
for dataset_config_name, train_split_name in zip(args.dataset_config_names, args.dataset_split_names):
|
||||
|
||||
@@ -292,7 +292,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.AutoProcessor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
@@ -275,7 +275,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.AutoProcessor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
@@ -559,7 +559,7 @@ def main():
|
||||
)
|
||||
|
||||
# if we doing adapter language training, save
|
||||
# vocab with adpter language
|
||||
# vocab with adapter language
|
||||
if data_args.target_language is not None:
|
||||
vocab_dict[data_args.target_language] = lang_dict
|
||||
|
||||
|
||||
@@ -429,7 +429,7 @@ def main():
|
||||
if is_regression:
|
||||
label_list = None
|
||||
num_labels = 1
|
||||
# regession requires float as label type, let's cast it if needed
|
||||
# regression requires float as label type, let's cast it if needed
|
||||
for split in raw_datasets.keys():
|
||||
if raw_datasets[split].features["label"].dtype not in ["float32", "float64"]:
|
||||
logger.warning(
|
||||
|
||||
@@ -19,7 +19,7 @@ limitations under the License.
|
||||
Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).
|
||||
|
||||
Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, GPT-J, Transformer-XL, XLNet, CTRL, BLOOM, LLAMA, OPT.
|
||||
A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
|
||||
A similar script is used for our official demo [Write With Transformer](https://transformer.huggingface.co), where you
|
||||
can try out the different models available in the library.
|
||||
|
||||
Example usage:
|
||||
|
||||
@@ -19,7 +19,7 @@ limitations under the License.
|
||||
## PyTorch version
|
||||
|
||||
Fine-tuning the library models for token classification task such as Named Entity Recognition (NER), Parts-of-speech
|
||||
tagging (POS) or phrase extraction (CHUNKS). The main scrip `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
|
||||
tagging (POS) or phrase extraction (CHUNKS). The main script `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
|
||||
customize it to your needs if you need extra processing on your datasets.
|
||||
|
||||
It will either run on a datasets hosted on our [hub](https://huggingface.co/datasets) or with your own text files for
|
||||
|
||||
Reference in New Issue
Block a user