Fix doc errors and typos across the board (#8139)

* Fix doc errors and typos across the board

* Fix a typo

* Fix the CI

* Fix more typos

* Fix CI

* More fixes

* Fix CI

* More fixes

* More fixes
This commit is contained in:
Santiago Castro
2020-10-29 10:33:33 -04:00
committed by GitHub
parent 4731a00c3e
commit 969859d5f6
160 changed files with 342 additions and 364 deletions

View File

@@ -330,7 +330,7 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling):
input_ids, labels, attention_mask = self.mask_tokens(input_ids)
token_type_ids = [example["token_type_ids"] for example in examples]
# size of segment_ids varied because randomness, padding zero to the end as the orignal implementation
# size of segment_ids varied because randomness, padding zero to the end as the original implementation
token_type_ids = pad_sequence(token_type_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
sop_label_list = [example["sentence_order_label"] for example in examples]

View File

@@ -71,7 +71,7 @@ class TextDataset(Dataset):
tokenizer.build_inputs_with_special_tokens(tokenized_text[i : i + block_size])
)
# Note that we are losing the last truncated example here for the sake of simplicity (no padding)
# If your dataset is small, first you should loook for a bigger one :-) and second you
# If your dataset is small, first you should look for a bigger one :-) and second you
# can change this behavior by adding (model specific) padding.
start = time.time()

View File

@@ -327,7 +327,7 @@ def squad_convert_examples_to_features(
padding_strategy: Default to "max_length". Which padding strategy to use
return_dataset: Default False. Either 'pt' or 'tf'.
if 'pt': returns a torch.data.TensorDataset, if 'tf': returns a tf.data.Dataset
threads: multiple processing threadsa-smi
threads: multiple processing threads.
Returns:
@@ -527,7 +527,7 @@ def squad_convert_examples_to_features(
class SquadProcessor(DataProcessor):
"""
Processor for the SQuAD data set. Overriden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
Processor for the SQuAD data set. overridden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
version 2.0 of SQuAD, respectively.
"""

View File

@@ -245,9 +245,6 @@ class SingleSentenceClassificationProcessor(DataProcessor):
Args:
tokenizer: Instance of a tokenizer that will tokenize the examples
max_length: Maximum example length
task: GLUE task
label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
output_mode: String indicating the output mode. Either ``regression`` or ``classification``
pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
pad_token: Padding token
mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values