Big file_utils cleanup (#16396)
* Big file_utils cleanup * This one still needs to be treated separately
This commit is contained in:
@@ -646,9 +646,9 @@ class BatchEncoding(UserDict):
|
||||
Convert the inner content to tensors.
|
||||
|
||||
Args:
|
||||
tensor_type (`str` or [`~file_utils.TensorType`], *optional*):
|
||||
The type of tensors to use. If `str`, should be one of the values of the enum
|
||||
[`~file_utils.TensorType`]. If `None`, no modification is done.
|
||||
tensor_type (`str` or [`~utils.TensorType`], *optional*):
|
||||
The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
|
||||
`None`, no modification is done.
|
||||
prepend_batch_axis (`int`, *optional*, defaults to `False`):
|
||||
Whether or not to add the batch dimension during the conversion.
|
||||
"""
|
||||
@@ -1253,7 +1253,7 @@ class SpecialTokensMixin:
|
||||
ENCODE_KWARGS_DOCSTRING = r"""
|
||||
add_special_tokens (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to encode the sequences with the special tokens relative to their model.
|
||||
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
|
||||
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
|
||||
Activates and controls padding. Accepts the following values:
|
||||
|
||||
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
||||
@@ -1295,7 +1295,7 @@ ENCODE_KWARGS_DOCSTRING = r"""
|
||||
pad_to_multiple_of (`int`, *optional*):
|
||||
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
|
||||
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
|
||||
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
|
||||
return_tensors (`str` or [`~utils.TensorType`], *optional*):
|
||||
If set, will return tensors instead of list of python integers. Acceptable values are:
|
||||
|
||||
- `'tf'`: Return TensorFlow `tf.constant` objects.
|
||||
@@ -2731,7 +2731,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
|
||||
Instead of `List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), see
|
||||
the note above for the return type.
|
||||
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
|
||||
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding
|
||||
index) among:
|
||||
|
||||
@@ -2753,7 +2753,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
to the specific tokenizer's default, defined by the `return_outputs` attribute.
|
||||
|
||||
[What are attention masks?](../glossary#attention-mask)
|
||||
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
|
||||
return_tensors (`str` or [`~utils.TensorType`], *optional*):
|
||||
If set, will return tensors instead of list of python integers. Acceptable values are:
|
||||
|
||||
- `'tf'`: Return TensorFlow `tf.constant` objects.
|
||||
@@ -3453,7 +3453,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
max_target_length (`int`, *optional*):
|
||||
Controls the maximum length of decoder inputs (target language texts or summaries) If left unset or set
|
||||
to `None`, this will use the max_length value.
|
||||
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
|
||||
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
|
||||
Activates and controls padding. Accepts the following values:
|
||||
|
||||
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
||||
@@ -3462,7 +3462,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
acceptable input length for the model if that argument is not provided.
|
||||
- `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
|
||||
lengths).
|
||||
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
|
||||
return_tensors (`str` or [`~utils.TensorType`], *optional*):
|
||||
If set, will return tensors instead of list of python integers. Acceptable values are:
|
||||
|
||||
- `'tf'`: Return TensorFlow `tf.constant` objects.
|
||||
|
||||
Reference in New Issue
Block a user