From d9e848c1d646a47e4cff50555f7bff3ad8e4033c Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Tue, 5 Jan 2021 07:05:32 -0800 Subject: [PATCH] add experimental warning (#9412) --- src/transformers/models/gpt2/modeling_gpt2.py | 2 ++ src/transformers/models/t5/modeling_t5.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 867a02d361..cc2f55709f 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -480,6 +480,8 @@ GPT2_INPUTS_DOCSTRING = r""" Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. """ PARALLELIZE_DOCSTRING = r""" + This is an experimental feature and is a subject to change at a moment's notice. + Uses a device map to distribute attention modules of the model across several devices. If no device map is given, it will evenly distribute blocks across all devices. diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 00d9ca30ec..996c6e254c 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -179,6 +179,8 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): # - PreTrainedModel for the models (it-self a sub-class of torch.nn.Module) #################################################### PARALLELIZE_DOCSTRING = r""" + This is an experimental feature and is a subject to change at a moment's notice. + Uses a device map to distribute attention modules of the model across several devices. If no device map is given, it will evenly distribute blocks across all devices.