[seq2seq] MAX_LEN env var for MT commands (#5837)
This commit is contained in:
@@ -78,7 +78,7 @@ The following command should work on a 16GB GPU:
|
|||||||
--model_name_or_path facebook/bart-large
|
--model_name_or_path facebook/bart-large
|
||||||
```
|
```
|
||||||
|
|
||||||
*Note*: The following tips mostly apply to summarization finetuning.
|
|
||||||
|
|
||||||
### Translation Finetuning
|
### Translation Finetuning
|
||||||
|
|
||||||
@@ -87,6 +87,7 @@ Then you can finetune mbart_cc25 on english-romanian with the following command.
|
|||||||
**Recommendation:** Read and potentially modify the fairly opinionated defaults in `train_mbart_cc25_enro.sh` script before running it.
|
**Recommendation:** Read and potentially modify the fairly opinionated defaults in `train_mbart_cc25_enro.sh` script before running it.
|
||||||
```bash
|
```bash
|
||||||
export ENRO_DIR=${PWD}/wmt_en_ro # may need to be fixed depending on where you downloaded
|
export ENRO_DIR=${PWD}/wmt_en_ro # may need to be fixed depending on where you downloaded
|
||||||
|
export MAX_LEN=128
|
||||||
export BS=4
|
export BS=4
|
||||||
export GAS=8
|
export GAS=8
|
||||||
./train_mbart_cc25_enro.sh --output_dir cc25_v1_frozen/
|
./train_mbart_cc25_enro.sh --output_dir cc25_v1_frozen/
|
||||||
|
|||||||
@@ -8,11 +8,10 @@ python finetune.py \
|
|||||||
--do_train \
|
--do_train \
|
||||||
--do_predict \
|
--do_predict \
|
||||||
--val_check_interval 0.1 \
|
--val_check_interval 0.1 \
|
||||||
--n_val 500 \
|
|
||||||
--adam_eps 1e-06 \
|
--adam_eps 1e-06 \
|
||||||
--num_train_epochs 3 --src_lang en_XX --tgt_lang ro_RO \
|
--num_train_epochs 3 --src_lang en_XX --tgt_lang ro_RO \
|
||||||
--freeze_encoder --freeze_embeds --data_dir $ENRO_DIR \
|
--freeze_encoder --freeze_embeds --data_dir $ENRO_DIR \
|
||||||
--max_source_length=300 --max_target_length 300 --val_max_target_length=300 --test_max_target_length 300 \
|
--max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
|
||||||
--train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \
|
--train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \
|
||||||
--model_name_or_path facebook/mbart-large-cc25 \
|
--model_name_or_path facebook/mbart-large-cc25 \
|
||||||
--task translation \
|
--task translation \
|
||||||
|
|||||||
18
examples/seq2seq/train_mbart_cc25_enro_multigpu.sh
Executable file
18
examples/seq2seq/train_mbart_cc25_enro_multigpu.sh
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
export PYTHONPATH="../":"${PYTHONPATH}"
|
||||||
|
# Need to export N_GPUS=
|
||||||
|
python finetune.py \
|
||||||
|
--learning_rate=3e-5 \
|
||||||
|
--fp16 \
|
||||||
|
--gpus $N_GPUS \
|
||||||
|
--do_train \
|
||||||
|
--val_check_interval 0.25 \
|
||||||
|
--adam_eps 1e-06 \
|
||||||
|
--num_train_epochs 6 --src_lang en_XX --tgt_lang ro_RO \
|
||||||
|
--data_dir $ENRO_DIR \
|
||||||
|
--max_source_length $MAX_LEN --max_target_length $MAX_LEN --val_max_target_length $MAX_LEN --test_max_target_length $MAX_LEN \
|
||||||
|
--train_batch_size=$BS --eval_batch_size=$BS --gradient_accumulation_steps=$GAS \
|
||||||
|
--tokenizer facebook/mbart-large-cc25 \
|
||||||
|
--task translation \
|
||||||
|
--warmup_steps 500 --freeze_encoder --freeze_embeds \
|
||||||
|
$@
|
||||||
Reference in New Issue
Block a user