@@ -738,7 +738,7 @@ leave any data in there.
|
||||
<Tip>
|
||||
|
||||
In order to run the equivalent of `rm -r` safely, only subdirs of the project repository checkout are allowed if
|
||||
an explicit obj:*tmp_dir* is used, so that by mistake no `/tmp` or similar important part of the filesystem will
|
||||
an explicit `tmp_dir` is used, so that by mistake no `/tmp` or similar important part of the filesystem will
|
||||
get nuked. i.e. please always pass paths that start with `./`.
|
||||
|
||||
</Tip>
|
||||
|
||||
@@ -1320,7 +1320,7 @@ class GenerationMixin:
|
||||
|
||||
Return:
|
||||
[`~generation_utils.GreedySearchDecoderOnlyOutput`], [`~generation_utils.GreedySearchEncoderDecoderOutput`]
|
||||
or obj:*torch.LongTensor*: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
or `torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
[`~generation_utils.GreedySearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||
`return_dict_in_generate=True` or a [`~generation_utils.GreedySearchEncoderDecoderOutput`] if
|
||||
`model.config.is_encoder_decoder=True`.
|
||||
@@ -1547,7 +1547,7 @@ class GenerationMixin:
|
||||
|
||||
Return:
|
||||
[`~generation_utils.SampleDecoderOnlyOutput`], [`~generation_utils.SampleEncoderDecoderOutput`] or
|
||||
obj:*torch.LongTensor*: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
[`~generation_utils.SampleDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||
`return_dict_in_generate=True` or a [`~generation_utils.SampleEncoderDecoderOutput`] if
|
||||
`model.config.is_encoder_decoder=True`.
|
||||
@@ -1785,7 +1785,7 @@ class GenerationMixin:
|
||||
|
||||
Return:
|
||||
[`generation_utilsBeamSearchDecoderOnlyOutput`], [`~generation_utils.BeamSearchEncoderDecoderOutput`] or
|
||||
obj:*torch.LongTensor*: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
[`~generation_utils.BeamSearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||
`return_dict_in_generate=True` or a [`~generation_utils.BeamSearchEncoderDecoderOutput`] if
|
||||
`model.config.is_encoder_decoder=True`.
|
||||
@@ -2079,7 +2079,7 @@ class GenerationMixin:
|
||||
|
||||
Return:
|
||||
[`~generation_utils.BeamSampleDecoderOnlyOutput`], [`~generation_utils.BeamSampleEncoderDecoderOutput`] or
|
||||
obj:*torch.LongTensor*: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
[`~generation_utils.BeamSampleDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||
`return_dict_in_generate=True` or a [`~generation_utils.BeamSampleEncoderDecoderOutput`] if
|
||||
`model.config.is_encoder_decoder=True`.
|
||||
@@ -2375,7 +2375,7 @@ class GenerationMixin:
|
||||
|
||||
Return:
|
||||
[`~generation_utils.BeamSearchDecoderOnlyOutput`], [`~generation_utils.BeamSearchEncoderDecoderOutput`] or
|
||||
obj:*torch.LongTensor*: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||
[`~generation_utils.BeamSearchDecoderOnlyOutput`] if [`~generation_utils.BeamSearchDecoderOnlyOutput`] if
|
||||
`model.config.is_encoder_decoder=False` and `return_dict_in_generate=True` or a
|
||||
[`~generation_utils.BeamSearchEncoderDecoderOutput`] if `model.config.is_encoder_decoder=True`.
|
||||
|
||||
@@ -1840,8 +1840,8 @@ class PoolerEndLogits(nn.Module):
|
||||
|
||||
<Tip>
|
||||
|
||||
One of `start_states` or `start_positions` should be not obj:`None`. If both are set, `start_positions`
|
||||
overrides `start_states`.
|
||||
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
|
||||
`start_states`.
|
||||
|
||||
</Tip>
|
||||
|
||||
@@ -1906,8 +1906,8 @@ class PoolerAnswerClass(nn.Module):
|
||||
|
||||
<Tip>
|
||||
|
||||
One of `start_states` or `start_positions` should be not obj:`None`. If both are set, `start_positions`
|
||||
overrides `start_states`.
|
||||
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
|
||||
`start_states`.
|
||||
|
||||
</Tip>
|
||||
|
||||
|
||||
@@ -293,7 +293,7 @@ class EncoderDecoderModel(PreTrainedModel):
|
||||
the model, you need to first set it back in training mode with `model.train()`.
|
||||
|
||||
Params:
|
||||
encoder_pretrained_model_name_or_path (:obj: *str*, *optional*):
|
||||
encoder_pretrained_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -306,7 +306,7 @@ class EncoderDecoderModel(PreTrainedModel):
|
||||
`config` argument. This loading path is slower than converting the TensorFlow checkpoint in a
|
||||
PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
|
||||
|
||||
decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
decoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the decoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -746,7 +746,7 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
|
||||
checkpoints.
|
||||
|
||||
Params:
|
||||
encoder_pretrained_model_name_or_path (:obj: *Union[str, os.PathLike]*, *optional*):
|
||||
encoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*):
|
||||
Information necessary to initiate the encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -755,7 +755,7 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
|
||||
- A path to a *directory* containing model weights saved using
|
||||
[`~FlaxPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`.
|
||||
|
||||
decoder_pretrained_model_name_or_path (:obj: *Union[str, os.PathLike]*, *optional*, defaults to `None`):
|
||||
decoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the decoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -308,7 +308,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
|
||||
|
||||
|
||||
Params:
|
||||
encoder_pretrained_model_name_or_path (:obj: *str*, *optional*):
|
||||
encoder_pretrained_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -319,7 +319,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
|
||||
- A path or url to a *pytorch index checkpoint file* (e.g, `./pt_model/`). In this case,
|
||||
`encoder_from_pt` should be set to `True`.
|
||||
|
||||
decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
decoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the decoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -713,7 +713,7 @@ def batch_frexp(inputs, max_bit=31):
|
||||
Target scaling factor to decompose.
|
||||
|
||||
Returns:
|
||||
:obj:``Tuple(torch.Tensor, torch.Tensor)`: mantisa and exponent
|
||||
``Tuple(torch.Tensor, torch.Tensor)`: mantisa and exponent
|
||||
"""
|
||||
|
||||
shape_of_input = inputs.size()
|
||||
|
||||
@@ -108,7 +108,7 @@ class LayoutLMEmbeddings(nn.Module):
|
||||
right_position_embeddings = self.x_position_embeddings(bbox[:, :, 2])
|
||||
lower_position_embeddings = self.y_position_embeddings(bbox[:, :, 3])
|
||||
except IndexError as e:
|
||||
raise IndexError("The :obj:`bbox`coordinate values should be within 0-1000 range.") from e
|
||||
raise IndexError("The `bbox`coordinate values should be within 0-1000 range.") from e
|
||||
|
||||
h_position_embeddings = self.h_position_embeddings(bbox[:, :, 3] - bbox[:, :, 1])
|
||||
w_position_embeddings = self.w_position_embeddings(bbox[:, :, 2] - bbox[:, :, 0])
|
||||
|
||||
@@ -162,7 +162,7 @@ class TFLayoutLMEmbeddings(tf.keras.layers.Layer):
|
||||
right_position_embeddings = tf.gather(self.x_position_embeddings, bbox[:, :, 2])
|
||||
lower_position_embeddings = tf.gather(self.y_position_embeddings, bbox[:, :, 3])
|
||||
except IndexError as e:
|
||||
raise IndexError("The :obj:`bbox`coordinate values should be within 0-1000 range.") from e
|
||||
raise IndexError("The `bbox`coordinate values should be within 0-1000 range.") from e
|
||||
h_position_embeddings = tf.gather(self.h_position_embeddings, bbox[:, :, 3] - bbox[:, :, 1])
|
||||
w_position_embeddings = tf.gather(self.w_position_embeddings, bbox[:, :, 2] - bbox[:, :, 0])
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ class LayoutLMv2Embeddings(nn.Module):
|
||||
right_position_embeddings = self.x_position_embeddings(bbox[:, :, 2])
|
||||
lower_position_embeddings = self.y_position_embeddings(bbox[:, :, 3])
|
||||
except IndexError as e:
|
||||
raise IndexError("The :obj:`bbox` coordinate values should be within 0-1000 range.") from e
|
||||
raise IndexError("The `bbox` coordinate values should be within 0-1000 range.") from e
|
||||
|
||||
h_position_embeddings = self.h_position_embeddings(bbox[:, :, 3] - bbox[:, :, 1])
|
||||
w_position_embeddings = self.w_position_embeddings(bbox[:, :, 2] - bbox[:, :, 0])
|
||||
|
||||
@@ -1324,7 +1324,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
|
||||
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
|
||||
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
|
||||
loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
|
||||
obj_labels: (`Dict[Str: Tuple[tf.Tensor, tf.Tensor]]`, *optional*, defaults to :obj: `None`):
|
||||
obj_labels: (`Dict[Str: Tuple[tf.Tensor, tf.Tensor]]`, *optional*, defaults to `None`):
|
||||
each key is named after each one of the visual losses and each element of the tuple is of the shape
|
||||
`(batch_size, num_features)` and `(batch_size, num_features, visual_feature_dim)` for each the label id and
|
||||
the label score respectively
|
||||
@@ -1334,7 +1334,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
|
||||
|
||||
- 0 indicates that the sentence does not match the image,
|
||||
- 1 indicates that the sentence does match the image.
|
||||
ans (`Torch.Tensor` of shape `(batch_size)`, *optional*, defaults to :obj: `None`):
|
||||
ans (`Torch.Tensor` of shape `(batch_size)`, *optional*, defaults to `None`):
|
||||
a one hot representation hof the correct answer *optional*
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -258,7 +258,7 @@ class RagPreTrainedModel(PreTrainedModel):
|
||||
the model, you need to first set it back in training mode with `model.train()`.
|
||||
|
||||
Params:
|
||||
question_encoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
question_encoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the question encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -271,7 +271,7 @@ class RagPreTrainedModel(PreTrainedModel):
|
||||
`config` argument. This loading path is slower than converting the TensorFlow checkpoint in a
|
||||
PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
|
||||
|
||||
generator_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
generator_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the generator. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -233,7 +233,7 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
|
||||
model checkpoints.
|
||||
|
||||
Params:
|
||||
question_encoder_pretrained_model_name_or_path (:obj: *str*, *optional*):
|
||||
question_encoder_pretrained_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the question encoder. Can be either:
|
||||
|
||||
- A string with the *shortcut name* of a pretrained model to load from cache or download, e.g.,
|
||||
@@ -245,7 +245,7 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
|
||||
- A path or url to a *pytorch index checkpoint file* (e.g, `./pt_model/`). In this case,
|
||||
`question_encoder_from_pt` should be set to `True`.
|
||||
|
||||
generator_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
generator_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the generator. Can be either:
|
||||
|
||||
- A string with the *shortcut name* of a pretrained model to load from cache or download, e.g.,
|
||||
|
||||
@@ -287,7 +287,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
|
||||
the model, you need to first set it back in training mode with `model.train()`.
|
||||
|
||||
Params:
|
||||
encoder_pretrained_model_name_or_path (:obj: *str*, *optional*):
|
||||
encoder_pretrained_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -300,7 +300,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
|
||||
`config` argument. This loading path is slower than converting the TensorFlow checkpoint in a
|
||||
PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
|
||||
|
||||
decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
decoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the decoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -915,7 +915,7 @@ class T5Stack(T5PreTrainedModel):
|
||||
mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length
|
||||
|
||||
if use_cache is True:
|
||||
assert self.is_decoder, f":obj:`use_cache` can only be set to `True` if {self} is used as a decoder"
|
||||
assert self.is_decoder, f"`use_cache` can only be set to `True` if {self} is used as a decoder"
|
||||
|
||||
if attention_mask is None:
|
||||
attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device)
|
||||
|
||||
@@ -2277,7 +2277,7 @@ def _calculate_expected_result(
|
||||
Numeric values of every token. Nan for tokens which are not numeric values.
|
||||
numeric_values_scale (`torch.FloatTensor` of shape `(batch_size, seq_length)`):
|
||||
Scale of the numeric values of every token.
|
||||
input_mask_float (:obj: *torch.FloatTensor* of shape `(batch_size, seq_length)`):
|
||||
input_mask_float (`torch.FloatTensor` of shape `(batch_size, seq_length)`):
|
||||
Mask for the table, without question tokens and table headers.
|
||||
logits_aggregation (`torch.FloatTensor` of shape `(batch_size, num_aggregation_labels)`):
|
||||
Logits per aggregation operation.
|
||||
@@ -2371,9 +2371,9 @@ def _calculate_regression_loss(
|
||||
Calculates the regression loss per example.
|
||||
|
||||
Args:
|
||||
answer (:obj: *torch.FloatTensor* of shape `(batch_size,)`):
|
||||
answer (`torch.FloatTensor` of shape `(batch_size,)`):
|
||||
Answer for every example in the batch. Nan if there is no scalar answer.
|
||||
aggregate_mask (:obj: *torch.FloatTensor* of shape `(batch_size,)`):
|
||||
aggregate_mask (`torch.FloatTensor` of shape `(batch_size,)`):
|
||||
A mask set to 1 for examples that should use aggregation functions.
|
||||
dist_per_cell (`torch.distributions.Bernoulli`):
|
||||
Cell selection distribution for each cell.
|
||||
@@ -2381,9 +2381,9 @@ def _calculate_regression_loss(
|
||||
Numeric values of every token. Nan for tokens which are not numeric values.
|
||||
numeric_values_scale (`torch.FloatTensor` of shape `(batch_size, seq_length)`):
|
||||
Scale of the numeric values of every token.
|
||||
input_mask_float (:obj: *torch.FloatTensor* of shape `(batch_size, seq_length)`):
|
||||
input_mask_float (`torch.FloatTensor` of shape `(batch_size, seq_length)`):
|
||||
Mask for the table, without question tokens and table headers.
|
||||
logits_aggregation (:obj: *torch.FloatTensor* of shape `(batch_size, num_aggregation_labels)`):
|
||||
logits_aggregation (`torch.FloatTensor` of shape `(batch_size, num_aggregation_labels)`):
|
||||
Logits per aggregation operation.
|
||||
config ([`TapasConfig`]):
|
||||
Model configuration class with all the parameters of the model
|
||||
|
||||
@@ -2241,7 +2241,7 @@ def _calculate_expected_result(
|
||||
Numeric values of every token. Nan for tokens which are not numeric values.
|
||||
numeric_values_scale (`tf.Tensor` of shape `(batch_size, seq_length)`):
|
||||
Scale of the numeric values of every token.
|
||||
input_mask_float (:obj: *tf.Tensor* of shape `(batch_size, seq_length)`):
|
||||
input_mask_float (`tf.Tensor` of shape `(batch_size, seq_length)`):
|
||||
Mask for the table, without question tokens and table headers.
|
||||
logits_aggregation (`tf.Tensor` of shape `(batch_size, num_aggregation_labels)`):
|
||||
Logits per aggregation operation.
|
||||
@@ -2321,9 +2321,9 @@ def _calculate_regression_loss(
|
||||
Calculates the regression loss per example.
|
||||
|
||||
Args:
|
||||
answer (:obj: *tf.Tensor* of shape `(batch_size,)`):
|
||||
answer (`tf.Tensor` of shape `(batch_size,)`):
|
||||
Answer for every example in the batch. Nan if there is no scalar answer.
|
||||
aggregate_mask (:obj: *tf.Tensor* of shape `(batch_size,)`):
|
||||
aggregate_mask (`tf.Tensor` of shape `(batch_size,)`):
|
||||
A mask set to 1 for examples that should use aggregation functions.
|
||||
dist_per_cell (`torch.distributions.Bernoulli`):
|
||||
Cell selection distribution for each cell.
|
||||
@@ -2331,9 +2331,9 @@ def _calculate_regression_loss(
|
||||
Numeric values of every token. Nan for tokens which are not numeric values.
|
||||
numeric_values_scale (`tf.Tensor` of shape `(batch_size, seq_length)`):
|
||||
Scale of the numeric values of every token.
|
||||
input_mask_float (:obj: *tf.Tensor* of shape `(batch_size, seq_length)`):
|
||||
input_mask_float (`tf.Tensor` of shape `(batch_size, seq_length)`):
|
||||
Mask for the table, without question tokens and table headers.
|
||||
logits_aggregation (:obj: *tf.Tensor* of shape `(batch_size, num_aggregation_labels)`):
|
||||
logits_aggregation (`tf.Tensor` of shape `(batch_size, num_aggregation_labels)`):
|
||||
Logits per aggregation operation.
|
||||
config ([`TapasConfig`]):
|
||||
Model configuration class with all the parameters of the model
|
||||
|
||||
@@ -73,7 +73,7 @@ class UniSpeechConfig(PretrainedConfig):
|
||||
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
|
||||
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
|
||||
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
feat_quantizer_dropout (obj:*float*, *optional*, defaults to 0.0):
|
||||
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout probabilitiy for quantized feature extractor states.
|
||||
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
|
||||
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
|
||||
|
||||
@@ -73,7 +73,7 @@ class UniSpeechSatConfig(PretrainedConfig):
|
||||
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
|
||||
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
|
||||
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
feat_quantizer_dropout (obj:*float*, *optional*, defaults to 0.0):
|
||||
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout probabilitiy for quantized feature extractor states.
|
||||
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
|
||||
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
|
||||
|
||||
@@ -712,7 +712,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
|
||||
checkpoints.
|
||||
|
||||
Params:
|
||||
encoder_pretrained_model_name_or_path (:obj: *Union[str, os.PathLike]*, *optional*):
|
||||
encoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*):
|
||||
Information necessary to initiate the encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co. An
|
||||
@@ -720,7 +720,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
|
||||
- A path to a *directory* containing model weights saved using
|
||||
[`~FlaxPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`.
|
||||
|
||||
decoder_pretrained_model_name_or_path (:obj: *Union[str, os.PathLike]*, *optional*, defaults to `None`):
|
||||
decoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the decoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -260,7 +260,7 @@ class VisionEncoderDecoderModel(PreTrainedModel):
|
||||
the model, you need to first set it back in training mode with `model.train()`.
|
||||
|
||||
Params:
|
||||
encoder_pretrained_model_name_or_path (:obj: *str*, *optional*):
|
||||
encoder_pretrained_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the image encoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co. An
|
||||
@@ -272,7 +272,7 @@ class VisionEncoderDecoderModel(PreTrainedModel):
|
||||
`config` argument. This loading path is slower than converting the TensorFlow checkpoint in a
|
||||
PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
|
||||
|
||||
decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
decoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the text decoder. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -403,7 +403,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
|
||||
) -> FlaxPreTrainedModel:
|
||||
"""
|
||||
Params:
|
||||
vision_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
vision_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the vision model. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -416,7 +416,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
|
||||
loading path is slower than converting the PyTorch checkpoint in a Flax model using the provided
|
||||
conversion scripts and loading the Flax model afterwards.
|
||||
|
||||
text_model_name_or_path (:obj: *str*, *optional*):
|
||||
text_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the text model. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -404,7 +404,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
|
||||
) -> PreTrainedModel:
|
||||
"""
|
||||
Params:
|
||||
vision_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
|
||||
vision_model_name_or_path (`str`, *optional*, defaults to `None`):
|
||||
Information necessary to initiate the vision model. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
@@ -417,7 +417,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
|
||||
loading path is slower than converting the PyTorch checkpoint in a Flax model using the provided
|
||||
conversion scripts and loading the Flax model afterwards.
|
||||
|
||||
text_model_name_or_path (:obj: *str*, *optional*):
|
||||
text_model_name_or_path (`str`, *optional*):
|
||||
Information necessary to initiate the text model. Can be either:
|
||||
|
||||
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
||||
|
||||
@@ -73,7 +73,7 @@ class Wav2Vec2Config(PretrainedConfig):
|
||||
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
|
||||
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
|
||||
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
feat_quantizer_dropout (obj:*float*, *optional*, defaults to 0.0):
|
||||
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout probabilitiy for quantized feature extractor states.
|
||||
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
|
||||
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
|
||||
|
||||
@@ -72,7 +72,7 @@ class WavLMConfig(PretrainedConfig):
|
||||
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
|
||||
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
|
||||
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
feat_quantizer_dropout (obj:*float*, *optional*, defaults to 0.0):
|
||||
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout probabilitiy for quantized feature extractor states.
|
||||
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
|
||||
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
|
||||
|
||||
@@ -512,15 +512,15 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
|
||||
curr_out = curr_out[: self.reuse_len]
|
||||
|
||||
if self.mem_len is None or self.mem_len == 0:
|
||||
# If :obj:`use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
|
||||
# If `use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
|
||||
# and returns all of the past and current hidden states.
|
||||
cutoff = 0
|
||||
else:
|
||||
# If :obj:`use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
|
||||
# If `use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
|
||||
# states. This is the preferred setting for training and long-form generation.
|
||||
cutoff = -self.mem_len
|
||||
if prev_mem is None:
|
||||
# if :obj:`use_mems` is active and `mem_len` is defined, the model
|
||||
# if `use_mems` is active and `mem_len` is defined, the model
|
||||
new_mem = curr_out[cutoff:]
|
||||
else:
|
||||
new_mem = tf.concat([prev_mem, curr_out], 0)[cutoff:]
|
||||
|
||||
@@ -1000,15 +1000,15 @@ class XLNetModel(XLNetPreTrainedModel):
|
||||
curr_out = curr_out[: self.reuse_len]
|
||||
|
||||
if self.mem_len is None or self.mem_len == 0:
|
||||
# If :obj:`use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
|
||||
# If `use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
|
||||
# and returns all of the past and current hidden states.
|
||||
cutoff = 0
|
||||
else:
|
||||
# If :obj:`use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
|
||||
# If `use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
|
||||
# states. This is the preferred setting for training and long-form generation.
|
||||
cutoff = -self.mem_len
|
||||
if prev_mem is None:
|
||||
# if :obj:`use_mems` is active and `mem_len` is defined, the model
|
||||
# if `use_mems` is active and `mem_len` is defined, the model
|
||||
new_mem = curr_out[cutoff:]
|
||||
else:
|
||||
new_mem = torch.cat([prev_mem, curr_out], dim=0)[cutoff:]
|
||||
|
||||
@@ -2466,7 +2466,7 @@ class Trainer:
|
||||
ignore_keys: Optional[List[str]] = None,
|
||||
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
"""
|
||||
Perform an evaluation step on `model` using obj:*inputs*.
|
||||
Perform an evaluation step on `model` using `inputs`.
|
||||
|
||||
Subclass and override to inject custom behavior.
|
||||
|
||||
|
||||
@@ -226,8 +226,8 @@ def torch_distributed_zero_first(local_rank: int):
|
||||
|
||||
class DistributedSamplerWithLoop(DistributedSampler):
|
||||
"""
|
||||
Like a :obj:torch.utils.data.distributed.DistributedSampler` but loops at the end back to the beginning of the
|
||||
shuffled samples to make each process have a round multiple of batch_size samples.
|
||||
Like a torch.utils.data.distributed.DistributedSampler` but loops at the end back to the beginning of the shuffled
|
||||
samples to make each process have a round multiple of batch_size samples.
|
||||
|
||||
Args:
|
||||
dataset (`torch.utils.data.Dataset`):
|
||||
|
||||
@@ -126,7 +126,7 @@ class Seq2SeqTrainer(Trainer):
|
||||
ignore_keys: Optional[List[str]] = None,
|
||||
) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
"""
|
||||
Perform an evaluation step on `model` using obj:*inputs*.
|
||||
Perform an evaluation step on `model` using `inputs`.
|
||||
|
||||
Subclass and override to inject custom behavior.
|
||||
|
||||
|
||||
@@ -175,8 +175,8 @@ class TrainingArguments:
|
||||
logging_steps (`int`, *optional*, defaults to 500):
|
||||
Number of update steps between two logs if `logging_strategy="steps"`.
|
||||
logging_nan_inf_filter (`bool`, *optional*, defaults to `True`):
|
||||
Whether to filter `nan` and `inf` losses for logging. If set to obj:`True` the loss of every step that is
|
||||
`nan` or `inf` is filtered and the average loss of the current logging window is taken instead.
|
||||
Whether to filter `nan` and `inf` losses for logging. If set to `True` the loss of every step that is `nan`
|
||||
or `inf` is filtered and the average loss of the current logging window is taken instead.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
||||
@@ -45,11 +45,11 @@ class TestCodeExamples(unittest.TestCase):
|
||||
the doctests in those files
|
||||
|
||||
Args:
|
||||
directory (:obj:`Path`): Directory containing the files
|
||||
identifier (:obj:`str`): Will parse files containing this
|
||||
ignore_files (:obj:`List[str]`): List of files to skip
|
||||
n_identifier (:obj:`str` or :obj:`List[str]`): Will not parse files containing this/these identifiers.
|
||||
only_modules (:obj:`bool`): Whether to only analyze modules
|
||||
directory (`Path`): Directory containing the files
|
||||
identifier (`str`): Will parse files containing this
|
||||
ignore_files (`List[str]`): List of files to skip
|
||||
n_identifier (`str` or `List[str]`): Will not parse files containing this/these identifiers.
|
||||
only_modules (`bool`): Whether to only analyze modules
|
||||
"""
|
||||
files = [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
|
||||
|
||||
|
||||
@@ -556,7 +556,7 @@ class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase)
|
||||
self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)
|
||||
|
||||
def test_xlnet_base_model_use_mems(self):
|
||||
# checking that in auto-regressive mode, :obj:`use_mems` gives the same results
|
||||
# checking that in auto-regressive mode, `use_mems` gives the same results
|
||||
self.model_tester.set_seed()
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_xlnet_model_use_mems(*config_and_inputs)
|
||||
|
||||
Reference in New Issue
Block a user