chore: Fix typos in docs and examples (#36524)

Fix typos in docs and examples

Signed-off-by: co63oc <co63oc@users.noreply.github.com>
This commit is contained in:
co63oc
2025-03-04 21:47:41 +08:00
committed by GitHub
parent 84f0186e89
commit 37508816d6
38 changed files with 50 additions and 50 deletions

View File

@@ -265,7 +265,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor ([`Wav2Vec2Processor`])
The processor used for proccessing the data.
The processor used for processing the data.
decoder_start_token_id (:obj: `int`)
The begin-of-sentence of the decoder.
input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):

View File

@@ -296,7 +296,7 @@ class DataCollatorForWav2Vec2Pretraining:
The Wav2Vec2 model used for pretraining. The data collator needs to have access
to config and ``_get_feat_extract_output_lengths`` function for correct padding.
feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`):
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:
@@ -445,7 +445,7 @@ def main():
accelerator.wait_for_everyone()
# 1. Download and create train, validation dataset
# We load all dataset configuration and datset split pairs passed in
# We load all dataset configuration and dataset split pairs passed in
# ``args.dataset_config_names`` and ``args.dataset_split_names``
datasets_splits = []
for dataset_config_name, train_split_name in zip(args.dataset_config_names, args.dataset_split_names):

View File

@@ -292,7 +292,7 @@ class DataCollatorCTCWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor (:class:`~transformers.AutoProcessor`)
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:

View File

@@ -275,7 +275,7 @@ class DataCollatorCTCWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor (:class:`~transformers.AutoProcessor`)
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:
@@ -559,7 +559,7 @@ def main():
)
# if we doing adapter language training, save
# vocab with adpter language
# vocab with adapter language
if data_args.target_language is not None:
vocab_dict[data_args.target_language] = lang_dict

View File

@@ -429,7 +429,7 @@ def main():
if is_regression:
label_list = None
num_labels = 1
# regession requires float as label type, let's cast it if needed
# regression requires float as label type, let's cast it if needed
for split in raw_datasets.keys():
if raw_datasets[split].features["label"].dtype not in ["float32", "float64"]:
logger.warning(

View File

@@ -19,7 +19,7 @@ limitations under the License.
Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).
Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, GPT-J, Transformer-XL, XLNet, CTRL, BLOOM, LLAMA, OPT.
A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
A similar script is used for our official demo [Write With Transformer](https://transformer.huggingface.co), where you
can try out the different models available in the library.
Example usage:

View File

@@ -19,7 +19,7 @@ limitations under the License.
## PyTorch version
Fine-tuning the library models for token classification task such as Named Entity Recognition (NER), Parts-of-speech
tagging (POS) or phrase extraction (CHUNKS). The main scrip `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
tagging (POS) or phrase extraction (CHUNKS). The main script `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
customize it to your needs if you need extra processing on your datasets.
It will either run on a datasets hosted on our [hub](https://huggingface.co/datasets) or with your own text files for

View File

@@ -37,7 +37,7 @@ class BertAbsConfig(PretrainedConfig):
max_pos: int
The maximum sequence length that this model will be used with.
enc_layer: int
The numner of hidden layers in the Transformer encoder.
The number of hidden layers in the Transformer encoder.
enc_hidden_size: int
The size of the encoder's layers.
enc_heads: int
@@ -49,7 +49,7 @@ class BertAbsConfig(PretrainedConfig):
embeddings, layers, pooler and also the attention probabilities in
the encoder.
dec_layer: int
The numner of hidden layers in the decoder.
The number of hidden layers in the decoder.
dec_hidden_size: int
The size of the decoder's layers.
dec_heads: int

View File

@@ -130,7 +130,7 @@ def convert_bertabs_checkpoints(path_to_checkpoints, dump_path):
mask_tgt = decoder_attention_mask = None
mask_cls = None
# The original model does not apply the geneator layer immediatly but rather in
# The original model does not apply the generator layer immediatly but rather in
# the beam search (where it combines softmax + linear layer). Since we already
# apply the softmax in our generation process we only apply the linear layer here.
# We make sure that the outputs of the full stack are identical
@@ -143,9 +143,9 @@ def convert_bertabs_checkpoints(path_to_checkpoints, dump_path):
output_converted_generator = new_model.generator(output_converted_model)
maximum_absolute_difference = torch.max(torch.abs(output_converted_model - output_original_model)).item()
print("Maximum absolute difference beween weights: {:.2f}".format(maximum_absolute_difference))
print("Maximum absolute difference between weights: {:.2f}".format(maximum_absolute_difference))
maximum_absolute_difference = torch.max(torch.abs(output_converted_generator - output_original_generator)).item()
print("Maximum absolute difference beween weights: {:.2f}".format(maximum_absolute_difference))
print("Maximum absolute difference between weights: {:.2f}".format(maximum_absolute_difference))
are_identical = torch.allclose(output_converted_model, output_original_model, atol=1e-3)
if are_identical:

View File

@@ -390,7 +390,7 @@ class MultiHeadedAttention(nn.Module):
:cite:`DBLP:journals/corr/VaswaniSPUJGKP17`.
Similar to standard `dot` attention but uses
multiple attention distributions simulataneously
multiple attention distributions simultaneously
to select relevant items.
.. mermaid::

View File

@@ -260,7 +260,7 @@ def main():
default=None,
type=str,
required=False,
help="The folder in wich the summaries should be written. Defaults to the folder where the documents are",
help="The folder in which the summaries should be written. Defaults to the folder where the documents are",
)
parser.add_argument(
"--compute_rouge",
@@ -315,7 +315,7 @@ def main():
)
args = parser.parse_args()
# Select device (distibuted not available)
# Select device (distributed not available)
args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
# Check the existence of directories

View File

@@ -24,7 +24,7 @@ class ConstantLengthDataset(IterableDataset):
"""
Iterable dataset that returns constant length chunks of tokens from stream of text files.
Args:
tokenizer (Tokenizer): The processor used for proccessing the data.
tokenizer (Tokenizer): The processor used for processing the data.
dataset (dataset.Dataset): Dataset with text files.
infinite (bool): If True the iterator is reset after dataset reaches end else stops.
seq_length (int): Length of token sequences to return.

View File

@@ -84,7 +84,7 @@ def is_config_or_test(example, scan_width=5, coeff=0.05):
def has_no_keywords(example):
"""Check if a python file has none of the keywords for: funcion, class, for loop, while loop."""
"""Check if a python file has none of the keywords for: function, class, for loop, while loop."""
keywords = ["def ", "class ", "for ", "while "]
lines = example["content"].splitlines()
for line in lines:

View File

@@ -252,7 +252,7 @@ def make_fast_generalized_attention(
unidirectional=False,
lax_scan_unroll=1,
):
"""Construct a fast generalized attention menthod."""
"""Construct a fast generalized attention method."""
logging.info("Fast generalized attention.: %s features and renormalize=%s", nb_features, renormalize_attention)
if features_type == "ortho":
matrix_creator = functools.partial(GaussianOrthogonalRandomMatrix, nb_features, qkv_dim, scaling=False)

View File

@@ -11,7 +11,7 @@ Please read the [accompanying blog post](https://shamanesiri.medium.com/how-to-f
The original RAG code has also been modified to work with the latest versions of pytorch lightning (version 1.2.10) and RAY (version 1.3.0). All other implementation details remain the same as the [original RAG code](https://github.com/huggingface/transformers/tree/main/examples/research_projects/rag).
Read more about RAG at https://arxiv.org/abs/2005.11401.
This code can be modified to experiment with other research on retrival augmented models which include training of the retriever (e.g. [REALM](https://arxiv.org/abs/2002.08909) and [MARGE](https://arxiv.org/abs/2006.15020)).
This code can be modified to experiment with other research on retrieval augmented models which include training of the retriever (e.g. [REALM](https://arxiv.org/abs/2002.08909) and [MARGE](https://arxiv.org/abs/2006.15020)).
To start training, use the bash script (finetune_rag_ray_end2end.sh) in this folder. This script also includes descriptions on each command-line argument used.

View File

@@ -134,7 +134,7 @@ class BaseTransformer(pl.LightningModule):
{
"params": [
p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)
], # check this named paramters
], # check this named parameters
"weight_decay": self.hparams.weight_decay,
},
{
@@ -279,7 +279,7 @@ class InitCallback(pl.Callback):
class CheckParamCallback(pl.Callback):
# check whether new added model paramters are differentiable
# check whether new added model parameters are differentiable
def on_after_backward(self, trainer, pl_module):
# print(pl_module.model.rag)
for name, param in pl_module.model.rag.named_parameters():

View File

@@ -98,7 +98,7 @@ Our evaluation script enables two modes of evaluation (controlled by the `eval_m
The evaluation script expects paths to two files:
- `evaluation_set` - a path to a file specifying the evaluation dataset, a single input per line.
- `gold_data_path` - a path to a file contaning ground truth answers for datapoints from the `evaluation_set`, a single output per line. Check below for expected formats of the gold data files.
- `gold_data_path` - a path to a file containing ground truth answers for datapoints from the `evaluation_set`, a single output per line. Check below for expected formats of the gold data files.
## Retrieval evaluation

View File

@@ -70,7 +70,7 @@ class RagPyTorchDistributedRetriever(RagRetriever):
logger.info("dist not initialized / main")
self.index.init_index()
# all processes wait untill the retriever is initialized by the main process
# all processes wait until the retriever is initialized by the main process
if dist.is_initialized():
torch.distributed.barrier(group=self.process_group)

View File

@@ -458,7 +458,7 @@ class GenerativeQAModule(BaseTransformer):
default=None,
help=(
"Name of the index to use: 'hf' for a canonical dataset from the datasets library (default), 'custom'"
" for a local index, or 'legacy' for the orignal one)"
" for a local index, or 'legacy' for the original one)"
),
)
parser.add_argument(

View File

@@ -266,7 +266,7 @@ class DataCollatorCTCWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor (:class:`~transformers.AutoProcessor`)
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:

View File

@@ -257,7 +257,7 @@ class DataCollatorCTCWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor (:class:`~transformers.AutoProcessor`)
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:

View File

@@ -226,7 +226,7 @@ class DataCollatorCTCWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor (:class:`~transformers.Wav2Vec2Processor`)
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:

View File

@@ -145,7 +145,7 @@ class DataCollatorCTCWithPadding:
Data collator that will dynamically pad the inputs received.
Args:
processor (:class:`~transformers.Wav2Vec2Processor`)
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:

View File

@@ -142,7 +142,7 @@ class DataCollatorForWav2Vec2Pretraining:
The Wav2Vec2 model used for pretraining. The data collator needs to have access
to config and ``_get_feat_extract_output_lengths`` function for correct padding.
feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`):
The processor used for proccessing the data.
The processor used for processing the data.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among: