add experimental warning (#9412)
This commit is contained in:
@@ -480,6 +480,8 @@ GPT2_INPUTS_DOCSTRING = r"""
|
|||||||
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
|
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
|
||||||
"""
|
"""
|
||||||
PARALLELIZE_DOCSTRING = r"""
|
PARALLELIZE_DOCSTRING = r"""
|
||||||
|
This is an experimental feature and is a subject to change at a moment's notice.
|
||||||
|
|
||||||
Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
|
Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
|
||||||
it will evenly distribute blocks across all devices.
|
it will evenly distribute blocks across all devices.
|
||||||
|
|
||||||
|
|||||||
@@ -179,6 +179,8 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
|
|||||||
# - PreTrainedModel for the models (it-self a sub-class of torch.nn.Module)
|
# - PreTrainedModel for the models (it-self a sub-class of torch.nn.Module)
|
||||||
####################################################
|
####################################################
|
||||||
PARALLELIZE_DOCSTRING = r"""
|
PARALLELIZE_DOCSTRING = r"""
|
||||||
|
This is an experimental feature and is a subject to change at a moment's notice.
|
||||||
|
|
||||||
Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
|
Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
|
||||||
it will evenly distribute blocks across all devices.
|
it will evenly distribute blocks across all devices.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user