From ba2400189b2242620868096ae49babf93bd9ce00 Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Fri, 17 Jul 2020 22:51:31 -0400 Subject: [PATCH] [seq2seq] MAX_LEN env var for MT commands (#5837) --- examples/seq2seq/README.md | 3 ++- examples/seq2seq/train_mbart_cc25_enro.sh | 3 +-- .../seq2seq/train_mbart_cc25_enro_multigpu.sh | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) create mode 100755 examples/seq2seq/train_mbart_cc25_enro_multigpu.sh diff --git a/examples/seq2seq/README.md b/examples/seq2seq/README.md index fdf3e83617..f726c63d3b 100644 --- a/examples/seq2seq/README.md +++ b/examples/seq2seq/README.md @@ -78,7 +78,7 @@ The following command should work on a 16GB GPU: --model_name_or_path facebook/bart-large ``` -*Note*: The following tips mostly apply to summarization finetuning. + ### Translation Finetuning @@ -87,6 +87,7 @@ Then you can finetune mbart_cc25 on english-romanian with the following command. **Recommendation:** Read and potentially modify the fairly opinionated defaults in `train_mbart_cc25_enro.sh` script before running it. ```bash export ENRO_DIR=${PWD}/wmt_en_ro # may need to be fixed depending on where you downloaded +export MAX_LEN=128 export BS=4 export GAS=8 ./train_mbart_cc25_enro.sh --output_dir cc25_v1_frozen/ diff --git a/examples/seq2seq/train_mbart_cc25_enro.sh b/examples/seq2seq/train_mbart_cc25_enro.sh index 4dcbe9ec1b..bb65bd8fb4 100755 --- a/examples/seq2seq/train_mbart_cc25_enro.sh +++ b/examples/seq2seq/train_mbart_cc25_enro.sh @@ -8,11 +8,10 @@ python finetune.py \ --do_train \ --do_predict \ --val_check_interval 0.1 \ - --n_val 500 \ --adam_eps 1e-06 \ --num_train_epochs 3 --src_lang en_XX --tgt_lang ro_RO \ --freeze_encoder --freeze_embeds --data_dir $ENRO_DIR \ - --max_source_length=300 --max_target_length 300 --val_max_target_length=300 --test_max_target_length 300 \ + --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \ --model_name_or_path facebook/mbart-large-cc25 \ --task translation \ diff --git a/examples/seq2seq/train_mbart_cc25_enro_multigpu.sh b/examples/seq2seq/train_mbart_cc25_enro_multigpu.sh new file mode 100755 index 0000000000..4368089d6d --- /dev/null +++ b/examples/seq2seq/train_mbart_cc25_enro_multigpu.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +export PYTHONPATH="../":"${PYTHONPATH}" +# Need to export N_GPUS= +python finetune.py \ + --learning_rate=3e-5 \ + --fp16 \ + --gpus $N_GPUS \ + --do_train \ + --val_check_interval 0.25 \ + --adam_eps 1e-06 \ + --num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \ + --data_dir $ENRO_DIR \ + --max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \ + --train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \ + --tokenizer facebook/mbart-large-cc25 \ + --task translation \ + --warmup_steps 500 --freeze_encoder --freeze_embeds \ + $@