[T5] allow config.decoder_layers to control decoder size (#7409)

* Working assymmetrical T5

* rename decoder_layers -> num_decoder_layers

* Fix docstring

* Allow creation of asymmetric t5 students
This commit is contained in:
Sam Shleifer
2020-09-28 03:08:04 -04:00
committed by GitHub
parent 7296fea1d6
commit 748425d47d
5 changed files with 58 additions and 10 deletions

View File

@@ -116,12 +116,14 @@ def create_student_by_copying_alternating_layers(
d = teacher_d
init_kwargs.update({"encoder_layers": e, "decoder_layers": d})
except AttributeError: # T5
teacher_e, teacher_d = teacher.config.num_layers, teacher.config.num_hidden_layers
assert e == d, "T5 Students must be symmetric"
init_kwargs["num_layers"] = e
# Kwargs to instantiate student = teacher kwargs with updated layer numbers + **extra_config_kwargs
teacher_e, teacher_d = teacher.config.num_layers, teacher.config.num_decoder_layers
if e is None:
e = teacher_e
if d is None:
d = teacher_d
init_kwargs.update({"num_layers": e, "num_decoder_layers": d})
# Kwargs to instantiate student: teacher kwargs with updated layer numbers + **extra_config_kwargs
init_kwargs.update(extra_config_kwargs)
# Copy weights