add experimental warning (#9412)

This commit is contained in:
Stas Bekman
2021-01-05 07:05:32 -08:00
committed by GitHub
parent 29acabd886
commit d9e848c1d6
2 changed files with 4 additions and 0 deletions

View File

@@ -480,6 +480,8 @@ GPT2_INPUTS_DOCSTRING = r"""
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
""" """
PARALLELIZE_DOCSTRING = r""" PARALLELIZE_DOCSTRING = r"""
This is an experimental feature and is a subject to change at a moment's notice.
Uses a device map to distribute attention modules of the model across several devices. If no device map is given, Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
it will evenly distribute blocks across all devices. it will evenly distribute blocks across all devices.

View File

@@ -179,6 +179,8 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
# - PreTrainedModel for the models (it-self a sub-class of torch.nn.Module) # - PreTrainedModel for the models (it-self a sub-class of torch.nn.Module)
#################################################### ####################################################
PARALLELIZE_DOCSTRING = r""" PARALLELIZE_DOCSTRING = r"""
This is an experimental feature and is a subject to change at a moment's notice.
Uses a device map to distribute attention modules of the model across several devices. If no device map is given, Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
it will evenly distribute blocks across all devices. it will evenly distribute blocks across all devices.