Changed processor documentation architecture. Added documentation for GLUE

This commit is contained in:
LysandreJik
2019-09-25 08:30:07 -04:00
committed by Lysandre Debut
parent c4ac7a76db
commit ad4a393e2e
2 changed files with 55 additions and 30 deletions

View File

@@ -26,6 +26,7 @@ if is_tf_available():
logger = logging.getLogger(__name__)
def glue_convert_examples_to_features(examples, tokenizer,
max_length=512,
task=None,
@@ -36,7 +37,27 @@ def glue_convert_examples_to_features(examples, tokenizer,
pad_token_segment_id=0,
mask_padding_with_zero=True):
"""
Loads a data file into a list of `InputBatch`s
Loads a data file into a list of ``InputFeatures``
Args:
examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
tokenizer: Instance of a tokenizer that will tokenize the examples
max_length: Maximum example length
task: GLUE task
label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
output_mode: String indicating the output mode. Either ``regression`` or ``classification``
pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
pad_token: Padding token
pad_token_segment_id: The segment ID for the padding token (It is usually 0, but can vary such as for XLNet where it is 4)
mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
and by ``0`` for padded values. If set to ``False``, inverts it (``1`` for padded values, ``0`` for
actual values)
Returns:
If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
containing the task-specific features. If the input is a list of ``InputExamples``, will return
a list of task-specific ``InputFeatures`` which can be fed to the model.
"""
is_tf_dataset = False
if is_tf_available() and isinstance(examples, tf.data.Dataset):