Fix doc errors and typos across the board (#8139)
* Fix doc errors and typos across the board * Fix a typo * Fix the CI * Fix more typos * Fix CI * More fixes * Fix CI * More fixes * More fixes
This commit is contained in:
@@ -330,7 +330,7 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling):
|
||||
input_ids, labels, attention_mask = self.mask_tokens(input_ids)
|
||||
|
||||
token_type_ids = [example["token_type_ids"] for example in examples]
|
||||
# size of segment_ids varied because randomness, padding zero to the end as the orignal implementation
|
||||
# size of segment_ids varied because randomness, padding zero to the end as the original implementation
|
||||
token_type_ids = pad_sequence(token_type_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
|
||||
|
||||
sop_label_list = [example["sentence_order_label"] for example in examples]
|
||||
|
||||
@@ -71,7 +71,7 @@ class TextDataset(Dataset):
|
||||
tokenizer.build_inputs_with_special_tokens(tokenized_text[i : i + block_size])
|
||||
)
|
||||
# Note that we are losing the last truncated example here for the sake of simplicity (no padding)
|
||||
# If your dataset is small, first you should loook for a bigger one :-) and second you
|
||||
# If your dataset is small, first you should look for a bigger one :-) and second you
|
||||
# can change this behavior by adding (model specific) padding.
|
||||
|
||||
start = time.time()
|
||||
|
||||
@@ -327,7 +327,7 @@ def squad_convert_examples_to_features(
|
||||
padding_strategy: Default to "max_length". Which padding strategy to use
|
||||
return_dataset: Default False. Either 'pt' or 'tf'.
|
||||
if 'pt': returns a torch.data.TensorDataset, if 'tf': returns a tf.data.Dataset
|
||||
threads: multiple processing threadsa-smi
|
||||
threads: multiple processing threads.
|
||||
|
||||
|
||||
Returns:
|
||||
@@ -527,7 +527,7 @@ def squad_convert_examples_to_features(
|
||||
|
||||
class SquadProcessor(DataProcessor):
|
||||
"""
|
||||
Processor for the SQuAD data set. Overriden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
|
||||
Processor for the SQuAD data set. overridden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
|
||||
version 2.0 of SQuAD, respectively.
|
||||
"""
|
||||
|
||||
|
||||
@@ -245,9 +245,6 @@ class SingleSentenceClassificationProcessor(DataProcessor):
|
||||
Args:
|
||||
tokenizer: Instance of a tokenizer that will tokenize the examples
|
||||
max_length: Maximum example length
|
||||
task: GLUE task
|
||||
label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
|
||||
output_mode: String indicating the output mode. Either ``regression`` or ``classification``
|
||||
pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
|
||||
pad_token: Padding token
|
||||
mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
|
||||
|
||||
Reference in New Issue
Block a user