PegasusForConditionalGeneration (torch version) (#6340)
Co-authored-by: Jingqing Zhang <jingqing.zhang15@imperial.ac.uk>
This commit is contained in:
@@ -413,6 +413,18 @@ def get_layers_to_copy(n_to_get, tot):
|
||||
12: all_layers,
|
||||
}
|
||||
return layers_to_copy[n_to_get]
|
||||
elif tot == 16:
|
||||
layers_to_copy = { # maps num layers in student -> which teacher layers to copy
|
||||
1: [0],
|
||||
2: [0, 8],
|
||||
3: [0, 8, 15],
|
||||
4: [0, 5, 10, 15],
|
||||
6: [0, 3, 6, 9, 12, 15],
|
||||
8: [0, 2, 4, 6, 8, 10, 12, 15],
|
||||
9: [0, 1, 3, 5, 7, 9, 11, 13, 15],
|
||||
16: all_layers,
|
||||
}
|
||||
return layers_to_copy[n_to_get]
|
||||
else:
|
||||
return all_layers[:n_to_get] # TODO: better version on theseus-bart branch
|
||||
|
||||
|
||||
14
examples/seq2seq/finetune_pegasus_xsum.sh
Executable file
14
examples/seq2seq/finetune_pegasus_xsum.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
export PYTHONPATH="../":"${PYTHONPATH}"
|
||||
|
||||
# From appendix C of paper https://arxiv.org/abs/1912.08777
|
||||
# Set --gradient_accumulation_steps so that effective batch size is 256 (2*128, 4*64, 8*32, 16*16)
|
||||
python finetune.py \
|
||||
--learning_rate=1e-4 \
|
||||
--do_train \
|
||||
--do_predict \
|
||||
--n_val 1000 \
|
||||
--val_check_interval 0.25 \
|
||||
--max_source_length 512 --max_target_length 56 \
|
||||
--freeze_embeds --max_target_length 56 --label_smoothing 0.1 \
|
||||
$@
|
||||
Reference in New Issue
Block a user