diff --git a/docs/source/main_classes/trainer.rst b/docs/source/main_classes/trainer.rst index a6314e3253..850af5eb99 100644 --- a/docs/source/main_classes/trainer.rst +++ b/docs/source/main_classes/trainer.rst @@ -197,7 +197,7 @@ which should make the "stop and resume" style of training as close as possible t However, due to various default non-deterministic pytorch settings this might not fully work. If you want full determinism please refer to `Controlling sources of randomness `__. As explained in the document, that some of those settings -that make things determinstic (.e.g., ``torch.backends.cudnn.deterministic``) may slow things down, therefore this +that make things deterministic (.e.g., ``torch.backends.cudnn.deterministic``) may slow things down, therefore this can't be done by default, but you can enable those yourself if needed. diff --git a/docs/source/model_doc/deberta_v2.rst b/docs/source/model_doc/deberta_v2.rst index 9075129a7e..0b6ea1ddb8 100644 --- a/docs/source/model_doc/deberta_v2.rst +++ b/docs/source/model_doc/deberta_v2.rst @@ -53,7 +53,7 @@ New in v2: transformer layer to better learn the local dependency of input tokens. - **Sharing position projection matrix with content projection matrix in attention layer** Based on previous experiments, this can save parameters without affecting the performance. -- **Apply bucket to encode relative postions** The DeBERTa-v2 model uses log bucket to encode relative positions +- **Apply bucket to encode relative positions** The DeBERTa-v2 model uses log bucket to encode relative positions similar to T5. - **900M model & 1.5B model** Two additional model sizes are available: 900M and 1.5B, which significantly improves the performance of downstream tasks. diff --git a/docs/source/model_doc/speech_to_text.rst b/docs/source/model_doc/speech_to_text.rst index b8de71d66c..3b84fede85 100644 --- a/docs/source/model_doc/speech_to_text.rst +++ b/docs/source/model_doc/speech_to_text.rst @@ -42,8 +42,8 @@ features. The :class:`~transformers.Speech2TextProcessor` wraps :class:`~transfo predicted token ids. The feature extractor depends on :obj:`torchaudio` and the tokenizer depends on :obj:`sentencepiece` so be sure to -install those packages before running the examples. You could either install those as extra speech dependancies with -``pip install transformers"[speech, sentencepiece]"`` or install the packages seperatly with ``pip install torchaudio +install those packages before running the examples. You could either install those as extra speech dependencies with +``pip install transformers"[speech, sentencepiece]"`` or install the packages seperately with ``pip install torchaudio sentencepiece``. Also ``torchaudio`` requires the development version of the `libsndfile `__ package which can be installed via a system package manager. On Ubuntu it can be installed as follows: ``apt install libsndfile1-dev`` diff --git a/docs/source/training.rst b/docs/source/training.rst index 82bcecac27..ecb2c70b34 100644 --- a/docs/source/training.rst +++ b/docs/source/training.rst @@ -281,7 +281,7 @@ Fine-tuning in native PyTorch frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen> -You might need to restart your notebook at this stage to free some memory, or excute the following code: +You might need to restart your notebook at this stage to free some memory, or execute the following code: .. code-block:: python diff --git a/src/transformers/deepspeed.py b/src/transformers/deepspeed.py index 7f47ff9085..63cac7a67f 100644 --- a/src/transformers/deepspeed.py +++ b/src/transformers/deepspeed.py @@ -62,7 +62,7 @@ class HfDeepSpeedConfig: if isinstance(config_file_or_dict, dict): # Don't modify user's data should they want to reuse it (e.g. in tests), because once we - # modified it, it will not be accepted here again, since `auto` values would have been overriden + # modified it, it will not be accepted here again, since `auto` values would have been overridden config = deepcopy(config_file_or_dict) elif isinstance(config_file_or_dict, str): with io.open(config_file_or_dict, "r", encoding="utf-8") as f: diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index d9a0f6803d..7cb6f711ae 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -468,7 +468,7 @@ class TrainingSummary: model_card += f"This model is a fine-tuned version of [{self.finetuned_from}](https://huggingface.co/{self.finetuned_from}) on " if self.dataset is None: - model_card += "an unkown dataset." + model_card += "an unknown dataset." else: if isinstance(self.dataset, str): model_card += f"the {self.dataset} dataset." diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 7507a0831e..eba53d47d0 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -177,14 +177,14 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): - A path or url to a `pt index checkpoint file` (e.g, ``./tf_model/model.ckpt.index``). In this case, ``from_pt`` should be set to :obj:`True`. model_args (sequence of positional arguments, `optional`): - All remaning positional arguments will be passed to the underlying model's ``__init__`` method. + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. config (:obj:`Union[PretrainedConfig, str, os.PathLike]`, `optional`): Can be either: - an instance of a class derived from :class:`~transformers.PretrainedConfig`, - a string or path valid as input to :func:`~transformers.PretrainedConfig.from_pretrained`. - Configuration for the model to use instead of an automatically loaded configuation. Configuration can + Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when: - The model is a model provided by the library (loaded with the `model id` string of a pretrained diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index d3250e36e7..5c4e644acf 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1120,14 +1120,14 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu - :obj:`None` if you are both providing the configuration and state dictionary (resp. with keyword arguments ``config`` and ``state_dict``). model_args (sequence of positional arguments, `optional`): - All remaning positional arguments will be passed to the underlying model's ``__init__`` method. + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. config (:obj:`Union[PretrainedConfig, str]`, `optional`): Can be either: - an instance of a class derived from :class:`~transformers.PretrainedConfig`, - a string valid as input to :func:`~transformers.PretrainedConfig.from_pretrained`. - Configuration for the model to use instead of an automatically loaded configuation. Configuration can + Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when: - The model is a model provided by the library (loaded with the `model id` string of a pretrained diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index b52180125f..38fba2823d 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1038,14 +1038,14 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix - :obj:`None` if you are both providing the configuration and state dictionary (resp. with keyword arguments ``config`` and ``state_dict``). model_args (sequence of positional arguments, `optional`): - All remaning positional arguments will be passed to the underlying model's ``__init__`` method. + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. config (:obj:`Union[PretrainedConfig, str, os.PathLike]`, `optional`): Can be either: - an instance of a class derived from :class:`~transformers.PretrainedConfig`, - a string or path valid as input to :func:`~transformers.PretrainedConfig.from_pretrained`. - Configuration for the model to use instead of an automatically loaded configuation. Configuration can + Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when: - The model is a model provided by the library (loaded with the `model id` string of a pretrained diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 51cc371f6c..df69e43e8f 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -1138,7 +1138,7 @@ class BigBirdBlockSparseAttention(nn.Module): from_block_size: int. size of block in from sequence. to_block_size: int. size of block in to sequence. num_heads: int. total number of heads. - plan_from_length: list. plan from length where num_random_blocks are choosen from. + plan_from_length: list. plan from length where num_random_blocks are chosen from. plan_num_rand_blocks: list. number of rand blocks within the plan. window_block_left: int. number of blocks of window to left of a block. window_block_right: int. number of blocks of window to right of a block. diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 4fc668348c..1c052bce1a 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -952,7 +952,7 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): from_block_size: int. size of block in from sequence. to_block_size: int. size of block in to sequence. num_heads: int. total number of heads. - plan_from_length: list. plan from length where num_random_blocks are choosen from. + plan_from_length: list. plan from length where num_random_blocks are chosen from. plan_num_rand_blocks: list. number of rand blocks within the plan. window_block_left: int. number of blocks of window to left of a block. window_block_right: int. number of blocks of window to right of a block. diff --git a/src/transformers/models/clip/tokenization_clip.py b/src/transformers/models/clip/tokenization_clip.py index 39eed99e3a..474fc24421 100644 --- a/src/transformers/models/clip/tokenization_clip.py +++ b/src/transformers/models/clip/tokenization_clip.py @@ -60,7 +60,7 @@ def bytes_to_unicode(): The reversible bpe codes work on unicode strings. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. When you're at something like a 10B token dataset you end up needing around 5K for - decent coverage. This is a signficant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup + decent coverage. This is a significant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and unicode strings. """ bs = ( @@ -317,7 +317,7 @@ class CLIPTokenizer(PreTrainedTokenizer): for token in re.findall(self.pat, text): token = "".join( self.byte_encoder[b] for b in token.encode("utf-8") - ) # Maps all our bytes to unicode strings, avoiding controle tokens of the BPE (spaces in our case) + ) # Maps all our bytes to unicode strings, avoiding control tokens of the BPE (spaces in our case) bpe_tokens.extend(bpe_token for bpe_token in self.bpe(token).split(" ")) return bpe_tokens diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index a094c03485..3061addada 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -151,7 +151,7 @@ class DetrObjectDetectionOutput(ModelOutput): unnormalized bounding boxes. auxiliary_outputs (:obj:`list[Dict]`, `optional`): Optional, only returned when auxilary losses are activated (i.e. :obj:`config.auxiliary_loss` is set to - `True`) and labels are provided. It is a list of dictionnaries containing the two above keys (:obj:`logits` + `True`) and labels are provided. It is a list of dictionaries containing the two above keys (:obj:`logits` and :obj:`pred_boxes`) for each decoder layer. last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): Sequence of hidden-states at the output of the last layer of the decoder of the model. @@ -218,8 +218,8 @@ class DetrSegmentationOutput(ModelOutput): :meth:`~transformers.DetrFeatureExtractor.post_process_panoptic` to evaluate instance and panoptic segmentation masks respectively. auxiliary_outputs (:obj:`list[Dict]`, `optional`): - Optional, only returned when auxilary losses are activated (i.e. :obj:`config.auxiliary_loss` is set to - `True`) and labels are provided. It is a list of dictionnaries containing the two above keys (:obj:`logits` + Optional, only returned when auxiliary losses are activated (i.e. :obj:`config.auxiliary_loss` is set to + `True`) and labels are provided. It is a list of dictionaries containing the two above keys (:obj:`logits` and :obj:`pred_boxes`) for each decoder layer. last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): Sequence of hidden-states at the output of the last layer of the decoder of the model. diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index b3bb1eb603..c240d1079e 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -272,7 +272,7 @@ class EncoderDecoderModel(PreTrainedModel): a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards. model_args (remaining positional arguments, `optional`): - All remaning positional arguments will be passed to the underlying model's ``__init__`` method. + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. kwargs (remaining dictionary of keyword arguments, `optional`): Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., diff --git a/src/transformers/models/gpt_neo/configuration_gpt_neo.py b/src/transformers/models/gpt_neo/configuration_gpt_neo.py index 85b4f702d5..c009056cd6 100644 --- a/src/transformers/models/gpt_neo/configuration_gpt_neo.py +++ b/src/transformers/models/gpt_neo/configuration_gpt_neo.py @@ -205,7 +205,7 @@ def custom_unfold(input, dimension, size, step): def custom_get_block_length_and_num_blocks(seq_length, window_size): """ Custom implementation for GPTNeoAttentionMixin._get_block_length_and_num_blocks to enable the export to ONNX as - original implmentation uses Python variables and control flow. + original implementation uses Python variables and control flow. """ import torch diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 012cd774da..6bb5169536 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -237,7 +237,7 @@ class HubertSamePadLayer(nn.Module): # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2FeatureExtractor with Wav2Vec2->Hubert class HubertFeatureExtractor(nn.Module): - """Construct the featurs from raw audio waveform""" + """Construct the features from raw audio waveform""" def __init__(self, config): super().__init__() diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 183a454373..9c64ed5995 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -283,7 +283,7 @@ class RagPreTrainedModel(PreTrainedModel): a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards. model_args (remaining positional arguments, `optional`): - All remaning positional arguments will be passed to the underlying model's ``__init__`` method. + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. retriever (:class:`~transformers.RagRetriever`, `optional`): The retriever to use. kwwargs (remaining dictionary of keyword arguments, `optional`): diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index afd5472f18..063e078cec 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -258,7 +258,7 @@ class TFRagPreTrainedModel(TFPreTrainedModel): ``generator_from_pt`` should be set to :obj:`True`. model_args (remaining positional arguments, `optional`): - All remaning positional arguments will be passed to the underlying model's ``__init__`` method. + All remaining positional arguments will be passed to the underlying model's ``__init__`` method. retriever (:class:`~transformers.RagRetriever`, `optional`): The retriever to use. kwargs (remaining dictionary of keyword arguments, `optional`): diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 34281c0068..c3c92e956f 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -385,7 +385,7 @@ class FlaxConvLayersCollection(nn.Module): class FlaxWav2Vec2FeatureExtractor(nn.Module): - """Construct the featurs from raw audio waveform""" + """Construct the features from raw audio waveform""" config: Wav2Vec2Config dtype: jnp.dtype = jnp.float32 diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 7db5fd7f1d..f603cbe5e6 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -308,7 +308,7 @@ class Wav2Vec2SamePadLayer(nn.Module): class Wav2Vec2FeatureExtractor(nn.Module): - """Construct the featurs from raw audio waveform""" + """Construct the features from raw audio waveform""" def __init__(self, config): super().__init__() diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index c18a646962..77ede156b3 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -158,7 +158,7 @@ def validate_model_outputs( # We flatten potential collection of outputs (i.e. past_keys) to a flat structure for name, value in ref_outputs.items(): - # Overwriting the output name as "present" since it is the name used for the ONNX ouputs + # Overwriting the output name as "present" since it is the name used for the ONNX outputs # ("past_key_values" being taken for the ONNX inputs) if name == "past_key_values": name = "present" diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py index 73f7df359c..c44fbba2d6 100644 --- a/src/transformers/onnx/features.py +++ b/src/transformers/onnx/features.py @@ -114,7 +114,7 @@ class FeaturesManager: Args: model: The model to export - feature: The name of the feature to check if it is avaiable + feature: The name of the feature to check if it is available Returns: (str) The type of the model (OnnxConfig) The OnnxConfig instance holding the model export properties diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index fda8892b82..963199bb7e 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1375,7 +1375,7 @@ INIT_TOKENIZER_DOCSTRING = r""" high-level keys being the ``__init__`` keyword name of each vocabulary file required by the model, the low-level being the :obj:`short-cut-names` of the pretrained models with, as associated values, the :obj:`url` to the associated pretrained vocabulary file. - - **max_model_input_sizes** (:obj:`Dict[str, Optinal[int]]`) -- A dictionary with, as keys, the + - **max_model_input_sizes** (:obj:`Dict[str, Optional[int]]`) -- A dictionary with, as keys, the :obj:`short-cut-names` of the pretrained models, and as associated values, the maximum length of the sequence inputs of this model, or :obj:`None` if the model has no maximum input size. - **pretrained_init_configuration** (:obj:`Dict[str, Dict[str, Any]]`) -- A dictionary with, as keys, the @@ -1785,7 +1785,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): config = AutoConfig.from_pretrained(pretrained_model_name_or_path) config_tokenizer_class = config.tokenizer_class except (OSError, ValueError, KeyError): - # skip if an error occured. + # skip if an error occurred. config = None if config_tokenizer_class is None: # Third attempt. If we have not yet found the original type of the tokenizer, diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 4f39a4dd64..b37539bb4f 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -707,7 +707,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): special_token_full = getattr(self, f"_{token}") if isinstance(special_token_full, AddedToken): - # Create an added token with the same paramters except the content + # Create an added token with the same parameters except the content kwargs[token] = AddedToken( special_token, single_word=special_token_full.single_word,