PegasusForConditionalGeneration (torch version) (#6340)

Co-authored-by: Jingqing  Zhang <jingqing.zhang15@imperial.ac.uk>
This commit is contained in:
Sam Shleifer
2020-08-11 14:31:23 -04:00
committed by GitHub
parent f6cb0f806e
commit 66fa8ceaea
20 changed files with 860 additions and 20 deletions

View File

@@ -413,6 +413,18 @@ def get_layers_to_copy(n_to_get, tot):
12: all_layers,
}
return layers_to_copy[n_to_get]
elif tot == 16:
layers_to_copy = { # maps num layers in student -> which teacher layers to copy
1: [0],
2: [0, 8],
3: [0, 8, 15],
4: [0, 5, 10, 15],
6: [0, 3, 6, 9, 12, 15],
8: [0, 2, 4, 6, 8, 10, 12, 15],
9: [0, 1, 3, 5, 7, 9, 11, 13, 15],
16: all_layers,
}
return layers_to_copy[n_to_get]
else:
return all_layers[:n_to_get] # TODO: better version on theseus-bart branch

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
export PYTHONPATH="../":"${PYTHONPATH}"
# From appendix C of paper https://arxiv.org/abs/1912.08777
# Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16)
python finetune.py \
--learning_rate=1e-4 \
--do_train \
--do_predict \
--n_val 1000 \
--val_check_interval 0.25 \
--max_source_length 512 --max_target_length 56 \
--freeze_embeds --max_target_length 56 --label_smoothing 0.1 \
$@