Fix doc errors and typos across the board (#8139)

* Fix doc errors and typos across the board * Fix a typo * Fix the CI * Fix more typos * Fix CI * More fixes * Fix CI * More fixes * More fixes
2020-10-29 10:33:33 -04:00
parent 4731a00c3e
commit 969859d5f6
160 changed files with 342 additions and 364 deletions
--- a/src/transformers/data/data_collator.py
+++ b/src/transformers/data/data_collator.py
@@ -330,7 +330,7 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling):
        input_ids, labels, attention_mask = self.mask_tokens(input_ids)

        token_type_ids = [example["token_type_ids"] for example in examples]
-        # size of segment_ids varied because randomness, padding zero to the end as the orignal implementation
+        # size of segment_ids varied because randomness, padding zero to the end as the original implementation
        token_type_ids = pad_sequence(token_type_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)

        sop_label_list = [example["sentence_order_label"] for example in examples]
--- a/src/transformers/data/datasets/language_modeling.py
+++ b/src/transformers/data/datasets/language_modeling.py
@@ -71,7 +71,7 @@ class TextDataset(Dataset):
                        tokenizer.build_inputs_with_special_tokens(tokenized_text[i : i + block_size])
                    )
                # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
-                # If your dataset is small, first you should loook for a bigger one :-) and second you
+                # If your dataset is small, first you should look for a bigger one :-) and second you
                # can change this behavior by adding (model specific) padding.

                start = time.time()
--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -327,7 +327,7 @@ def squad_convert_examples_to_features(
        padding_strategy: Default to "max_length". Which padding strategy to use
        return_dataset: Default False. Either 'pt' or 'tf'.
            if 'pt': returns a torch.data.TensorDataset, if 'tf': returns a tf.data.Dataset
-        threads: multiple processing threadsa-smi
+        threads: multiple processing threads.


    Returns:
@@ -527,7 +527,7 @@ def squad_convert_examples_to_features(

 class SquadProcessor(DataProcessor):
    """
-    Processor for the SQuAD data set. Overriden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
+    Processor for the SQuAD data set. overridden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
    version 2.0 of SQuAD, respectively.
    """

--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -245,9 +245,6 @@ class SingleSentenceClassificationProcessor(DataProcessor):
        Args:
            tokenizer: Instance of a tokenizer that will tokenize the examples
            max_length: Maximum example length
-            task: GLUE task
-            label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
-            output_mode: String indicating the output mode. Either ``regression`` or ``classification``
            pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
            pad_token: Padding token
            mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values