[Longformer, Bert, Roberta, ...] Fix multi gpu training (#7272)

* fix multi-gpu

* fix longformer

* force to delete unnecessary layers

* fix notifications

* fix warning

* fix roberta

* fix tests

* remove hasattr

* fix tests

* fix roberta

* merge and clean authorized keys
This commit is contained in:
Patrick von Platen
2020-09-25 20:33:21 +02:00
committed by GitHub
parent 2c8ecdf8a8
commit e50a931c11
16 changed files with 179 additions and 49 deletions

View File

@@ -398,6 +398,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
config_class = None
base_model_prefix = ""
authorized_missing_keys = None
authorized_unexpected_keys = None
keys_to_never_save = None
@property
@@ -1013,6 +1014,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
for pat in cls.authorized_missing_keys:
missing_keys = [k for k in missing_keys if re.search(pat, k) is None]
if cls.authorized_unexpected_keys is not None:
for pat in cls.authorized_unexpected_keys:
unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None]
if len(unexpected_keys) > 0:
logger.warning(
f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when "