[s2s] release pseudolabel links and instructions (#7639)

This commit is contained in:
Sam Shleifer
2020-10-07 11:20:44 -04:00
committed by GitHub
parent 08ba4b4902
commit e2bb9abb6a
2 changed files with 31 additions and 3 deletions

View File

@@ -13,7 +13,7 @@ logger = logging.get_logger(__name__)
def copy_layers(src_layers: nn.ModuleList, dest_layers: nn.ModuleList, layers_to_copy: List[int]) -> None:
layers_to_copy = nn.ModuleList([l for i, l in enumerate(src_layers) if i in layers_to_copy])
layers_to_copy = nn.ModuleList([src_layers[i] for i in layers_to_copy])
assert len(dest_layers) == len(layers_to_copy), f"{len(dest_layers)} != {len(layers_to_copy)}"
dest_layers.load_state_dict(layers_to_copy.state_dict())
@@ -81,6 +81,8 @@ def create_student_by_copying_alternating_layers(
e: Union[int, None] = None,
d: Union[int, None] = None,
copy_first_teacher_layers=False,
e_layers_to_copy=None,
d_layers_to_copy=None,
**extra_config_kwargs
) -> Tuple[PreTrainedModel, List[int], List[int]]:
"""Make a student by copying alternating layers from a teacher, save it to save_path.
@@ -142,8 +144,10 @@ def create_student_by_copying_alternating_layers(
return student, e_layers_to_copy, d_layers_to_copy
# Decide which layers of the teacher to copy. Not exactly alternating -- we try to keep first and last layer.
e_layers_to_copy: List[int] = pick_layers_to_copy(e, teacher_e)
d_layers_to_copy: List[int] = pick_layers_to_copy(d, teacher_d)
if e_layers_to_copy is None:
e_layers_to_copy: List[int] = pick_layers_to_copy(e, teacher_e)
if d_layers_to_copy is None:
d_layers_to_copy: List[int] = pick_layers_to_copy(d, teacher_d)
try:
copy_layers(teacher.model.encoder.layers, student.model.encoder.layers, e_layers_to_copy)