chore: Fix typos in docs and examples (#36524)
Fix typos in docs and examples Signed-off-by: co63oc <co63oc@users.noreply.github.com>
This commit is contained in:
@@ -265,7 +265,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor ([`Wav2Vec2Processor`])
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
decoder_start_token_id (:obj: `int`)
|
||||
The begin-of-sentence of the decoder.
|
||||
input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
|
||||
@@ -296,7 +296,7 @@ class DataCollatorForWav2Vec2Pretraining:
|
||||
The Wav2Vec2 model used for pretraining. The data collator needs to have access
|
||||
to config and ``_get_feat_extract_output_lengths`` function for correct padding.
|
||||
feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`):
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
@@ -445,7 +445,7 @@ def main():
|
||||
accelerator.wait_for_everyone()
|
||||
|
||||
# 1. Download and create train, validation dataset
|
||||
# We load all dataset configuration and datset split pairs passed in
|
||||
# We load all dataset configuration and dataset split pairs passed in
|
||||
# ``args.dataset_config_names`` and ``args.dataset_split_names``
|
||||
datasets_splits = []
|
||||
for dataset_config_name, train_split_name in zip(args.dataset_config_names, args.dataset_split_names):
|
||||
|
||||
@@ -292,7 +292,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.AutoProcessor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
@@ -275,7 +275,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.AutoProcessor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
@@ -559,7 +559,7 @@ def main():
|
||||
)
|
||||
|
||||
# if we doing adapter language training, save
|
||||
# vocab with adpter language
|
||||
# vocab with adapter language
|
||||
if data_args.target_language is not None:
|
||||
vocab_dict[data_args.target_language] = lang_dict
|
||||
|
||||
|
||||
@@ -429,7 +429,7 @@ def main():
|
||||
if is_regression:
|
||||
label_list = None
|
||||
num_labels = 1
|
||||
# regession requires float as label type, let's cast it if needed
|
||||
# regression requires float as label type, let's cast it if needed
|
||||
for split in raw_datasets.keys():
|
||||
if raw_datasets[split].features["label"].dtype not in ["float32", "float64"]:
|
||||
logger.warning(
|
||||
|
||||
@@ -19,7 +19,7 @@ limitations under the License.
|
||||
Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).
|
||||
|
||||
Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, GPT-J, Transformer-XL, XLNet, CTRL, BLOOM, LLAMA, OPT.
|
||||
A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
|
||||
A similar script is used for our official demo [Write With Transformer](https://transformer.huggingface.co), where you
|
||||
can try out the different models available in the library.
|
||||
|
||||
Example usage:
|
||||
|
||||
@@ -19,7 +19,7 @@ limitations under the License.
|
||||
## PyTorch version
|
||||
|
||||
Fine-tuning the library models for token classification task such as Named Entity Recognition (NER), Parts-of-speech
|
||||
tagging (POS) or phrase extraction (CHUNKS). The main scrip `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
|
||||
tagging (POS) or phrase extraction (CHUNKS). The main script `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
|
||||
customize it to your needs if you need extra processing on your datasets.
|
||||
|
||||
It will either run on a datasets hosted on our [hub](https://huggingface.co/datasets) or with your own text files for
|
||||
|
||||
@@ -37,7 +37,7 @@ class BertAbsConfig(PretrainedConfig):
|
||||
max_pos: int
|
||||
The maximum sequence length that this model will be used with.
|
||||
enc_layer: int
|
||||
The numner of hidden layers in the Transformer encoder.
|
||||
The number of hidden layers in the Transformer encoder.
|
||||
enc_hidden_size: int
|
||||
The size of the encoder's layers.
|
||||
enc_heads: int
|
||||
@@ -49,7 +49,7 @@ class BertAbsConfig(PretrainedConfig):
|
||||
embeddings, layers, pooler and also the attention probabilities in
|
||||
the encoder.
|
||||
dec_layer: int
|
||||
The numner of hidden layers in the decoder.
|
||||
The number of hidden layers in the decoder.
|
||||
dec_hidden_size: int
|
||||
The size of the decoder's layers.
|
||||
dec_heads: int
|
||||
|
||||
@@ -130,7 +130,7 @@ def convert_bertabs_checkpoints(path_to_checkpoints, dump_path):
|
||||
mask_tgt = decoder_attention_mask = None
|
||||
mask_cls = None
|
||||
|
||||
# The original model does not apply the geneator layer immediatly but rather in
|
||||
# The original model does not apply the generator layer immediatly but rather in
|
||||
# the beam search (where it combines softmax + linear layer). Since we already
|
||||
# apply the softmax in our generation process we only apply the linear layer here.
|
||||
# We make sure that the outputs of the full stack are identical
|
||||
@@ -143,9 +143,9 @@ def convert_bertabs_checkpoints(path_to_checkpoints, dump_path):
|
||||
output_converted_generator = new_model.generator(output_converted_model)
|
||||
|
||||
maximum_absolute_difference = torch.max(torch.abs(output_converted_model - output_original_model)).item()
|
||||
print("Maximum absolute difference beween weights: {:.2f}".format(maximum_absolute_difference))
|
||||
print("Maximum absolute difference between weights: {:.2f}".format(maximum_absolute_difference))
|
||||
maximum_absolute_difference = torch.max(torch.abs(output_converted_generator - output_original_generator)).item()
|
||||
print("Maximum absolute difference beween weights: {:.2f}".format(maximum_absolute_difference))
|
||||
print("Maximum absolute difference between weights: {:.2f}".format(maximum_absolute_difference))
|
||||
|
||||
are_identical = torch.allclose(output_converted_model, output_original_model, atol=1e-3)
|
||||
if are_identical:
|
||||
|
||||
@@ -390,7 +390,7 @@ class MultiHeadedAttention(nn.Module):
|
||||
:cite:`DBLP:journals/corr/VaswaniSPUJGKP17`.
|
||||
|
||||
Similar to standard `dot` attention but uses
|
||||
multiple attention distributions simulataneously
|
||||
multiple attention distributions simultaneously
|
||||
to select relevant items.
|
||||
|
||||
.. mermaid::
|
||||
|
||||
@@ -260,7 +260,7 @@ def main():
|
||||
default=None,
|
||||
type=str,
|
||||
required=False,
|
||||
help="The folder in wich the summaries should be written. Defaults to the folder where the documents are",
|
||||
help="The folder in which the summaries should be written. Defaults to the folder where the documents are",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compute_rouge",
|
||||
@@ -315,7 +315,7 @@ def main():
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Select device (distibuted not available)
|
||||
# Select device (distributed not available)
|
||||
args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
|
||||
|
||||
# Check the existence of directories
|
||||
|
||||
@@ -24,7 +24,7 @@ class ConstantLengthDataset(IterableDataset):
|
||||
"""
|
||||
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
||||
Args:
|
||||
tokenizer (Tokenizer): The processor used for proccessing the data.
|
||||
tokenizer (Tokenizer): The processor used for processing the data.
|
||||
dataset (dataset.Dataset): Dataset with text files.
|
||||
infinite (bool): If True the iterator is reset after dataset reaches end else stops.
|
||||
seq_length (int): Length of token sequences to return.
|
||||
|
||||
@@ -84,7 +84,7 @@ def is_config_or_test(example, scan_width=5, coeff=0.05):
|
||||
|
||||
|
||||
def has_no_keywords(example):
|
||||
"""Check if a python file has none of the keywords for: funcion, class, for loop, while loop."""
|
||||
"""Check if a python file has none of the keywords for: function, class, for loop, while loop."""
|
||||
keywords = ["def ", "class ", "for ", "while "]
|
||||
lines = example["content"].splitlines()
|
||||
for line in lines:
|
||||
|
||||
@@ -252,7 +252,7 @@ def make_fast_generalized_attention(
|
||||
unidirectional=False,
|
||||
lax_scan_unroll=1,
|
||||
):
|
||||
"""Construct a fast generalized attention menthod."""
|
||||
"""Construct a fast generalized attention method."""
|
||||
logging.info("Fast generalized attention.: %s features and renormalize=%s", nb_features, renormalize_attention)
|
||||
if features_type == "ortho":
|
||||
matrix_creator = functools.partial(GaussianOrthogonalRandomMatrix, nb_features, qkv_dim, scaling=False)
|
||||
|
||||
@@ -11,7 +11,7 @@ Please read the [accompanying blog post](https://shamanesiri.medium.com/how-to-f
|
||||
The original RAG code has also been modified to work with the latest versions of pytorch lightning (version 1.2.10) and RAY (version 1.3.0). All other implementation details remain the same as the [original RAG code](https://github.com/huggingface/transformers/tree/main/examples/research_projects/rag).
|
||||
Read more about RAG at https://arxiv.org/abs/2005.11401.
|
||||
|
||||
This code can be modified to experiment with other research on retrival augmented models which include training of the retriever (e.g. [REALM](https://arxiv.org/abs/2002.08909) and [MARGE](https://arxiv.org/abs/2006.15020)).
|
||||
This code can be modified to experiment with other research on retrieval augmented models which include training of the retriever (e.g. [REALM](https://arxiv.org/abs/2002.08909) and [MARGE](https://arxiv.org/abs/2006.15020)).
|
||||
|
||||
To start training, use the bash script (finetune_rag_ray_end2end.sh) in this folder. This script also includes descriptions on each command-line argument used.
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ class BaseTransformer(pl.LightningModule):
|
||||
{
|
||||
"params": [
|
||||
p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)
|
||||
], # check this named paramters
|
||||
], # check this named parameters
|
||||
"weight_decay": self.hparams.weight_decay,
|
||||
},
|
||||
{
|
||||
@@ -279,7 +279,7 @@ class InitCallback(pl.Callback):
|
||||
|
||||
|
||||
class CheckParamCallback(pl.Callback):
|
||||
# check whether new added model paramters are differentiable
|
||||
# check whether new added model parameters are differentiable
|
||||
def on_after_backward(self, trainer, pl_module):
|
||||
# print(pl_module.model.rag)
|
||||
for name, param in pl_module.model.rag.named_parameters():
|
||||
|
||||
@@ -98,7 +98,7 @@ Our evaluation script enables two modes of evaluation (controlled by the `eval_m
|
||||
|
||||
The evaluation script expects paths to two files:
|
||||
- `evaluation_set` - a path to a file specifying the evaluation dataset, a single input per line.
|
||||
- `gold_data_path` - a path to a file contaning ground truth answers for datapoints from the `evaluation_set`, a single output per line. Check below for expected formats of the gold data files.
|
||||
- `gold_data_path` - a path to a file containing ground truth answers for datapoints from the `evaluation_set`, a single output per line. Check below for expected formats of the gold data files.
|
||||
|
||||
|
||||
## Retrieval evaluation
|
||||
|
||||
@@ -70,7 +70,7 @@ class RagPyTorchDistributedRetriever(RagRetriever):
|
||||
logger.info("dist not initialized / main")
|
||||
self.index.init_index()
|
||||
|
||||
# all processes wait untill the retriever is initialized by the main process
|
||||
# all processes wait until the retriever is initialized by the main process
|
||||
if dist.is_initialized():
|
||||
torch.distributed.barrier(group=self.process_group)
|
||||
|
||||
|
||||
@@ -458,7 +458,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
default=None,
|
||||
help=(
|
||||
"Name of the index to use: 'hf' for a canonical dataset from the datasets library (default), 'custom'"
|
||||
" for a local index, or 'legacy' for the orignal one)"
|
||||
" for a local index, or 'legacy' for the original one)"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
|
||||
@@ -266,7 +266,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.AutoProcessor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
@@ -257,7 +257,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.AutoProcessor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
@@ -226,7 +226,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.Wav2Vec2Processor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
@@ -145,7 +145,7 @@ class DataCollatorCTCWithPadding:
|
||||
Data collator that will dynamically pad the inputs received.
|
||||
Args:
|
||||
processor (:class:`~transformers.Wav2Vec2Processor`)
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
@@ -142,7 +142,7 @@ class DataCollatorForWav2Vec2Pretraining:
|
||||
The Wav2Vec2 model used for pretraining. The data collator needs to have access
|
||||
to config and ``_get_feat_extract_output_lengths`` function for correct padding.
|
||||
feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`):
|
||||
The processor used for proccessing the data.
|
||||
The processor used for processing the data.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
Reference in New Issue
Block a user