From 7566fefa6918ed5b0a5c01e9dc0fa945afe800bf Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 14 Jun 2021 11:00:29 +0100 Subject: [PATCH] [Flax] Add links to google colabs (#12146) * fix_torch_device_generate_test * remove @ * add colab links --- examples/flax/README.md | 3 +- examples/flax/language-modeling/README.md | 122 +++++++++++----------- 2 files changed, 63 insertions(+), 62 deletions(-) diff --git a/examples/flax/README.md b/examples/flax/README.md index 039bf9de18..06d36f9d73 100644 --- a/examples/flax/README.md +++ b/examples/flax/README.md @@ -58,5 +58,6 @@ The following table lists all of our examples on how to use 🤗 Transformers wi | Task | Example model | Example dataset | 🤗 Datasets | Colab |---|---|---|:---:|:---:| -| [**`masked-language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/flax/language-modeling) | BERT | OSCAR | ✅ | [![Open In Colab (TODO: Patrick)](https://colab.research.google.com/assets/colab-badge.svg)]() +| [**`causal-language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/flax/language-modeling) | GPT2 | OSCAR | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/causal_language_modeling_flax.ipynb) +| [**`masked-language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/flax/language-modeling) | RoBERTa | OSCAR | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/masked_language_modeling_flax.ipynb) | [**`text-classification`**](https://github.com/huggingface/transformers/tree/master/examples/flax/text-classification) | BERT | GLUE | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/text_classification_flax.ipynb) diff --git a/examples/flax/language-modeling/README.md b/examples/flax/language-modeling/README.md index 34d5cae140..cd0c499ffe 100644 --- a/examples/flax/language-modeling/README.md +++ b/examples/flax/language-modeling/README.md @@ -98,23 +98,23 @@ Next we can run the example script to pretrain the model: ```bash ./run_mlm_flax.py \ - --output_dir="./runs" \ - --model_type="roberta" \ - --config_name="${MODEL_DIR}" \ - --tokenizer_name="${MODEL_DIR}" \ - --dataset_name="oscar" \ - --dataset_config_name="unshuffled_deduplicated_no" \ - --max_seq_length="128" \ - --weight_decay="0.01" \ - --per_device_train_batch_size="128" \ - --per_device_eval_batch_size="128" \ - --learning_rate="3e-4" \ - --warmup_steps="1000" \ - --overwrite_output_dir \ - --pad_to_max_length \ - --num_train_epochs="18" \ - --adam_beta1="0.9" \ - --adam_beta2="0.98" + --output_dir="./runs" \ + --model_type="roberta" \ + --config_name="${MODEL_DIR}" \ + --tokenizer_name="${MODEL_DIR}" \ + --dataset_name="oscar" \ + --dataset_config_name="unshuffled_deduplicated_no" \ + --max_seq_length="128" \ + --weight_decay="0.01" \ + --per_device_train_batch_size="128" \ + --per_device_eval_batch_size="128" \ + --learning_rate="3e-4" \ + --warmup_steps="1000" \ + --overwrite_output_dir \ + --pad_to_max_length \ + --num_train_epochs="18" \ + --adam_beta1="0.9" \ + --adam_beta2="0.98" ``` Training should converge at a loss and accuracy @@ -235,27 +235,27 @@ mkdir -p ${MODEL_DIR} ```bash python3 xla_spawn.py --num_cores ${NUM_TPUS} run_mlm.py --output_dir="./runs" \ - --model_type="roberta" \ - --config_name="${MODEL_DIR}" \ - --tokenizer_name="${MODEL_DIR}" \ - --dataset_name="oscar" \ - --dataset_config_name="unshuffled_deduplicated_no" \ - --max_seq_length="128" \ - --weight_decay="0.01" \ - --per_device_train_batch_size="128" \ - --per_device_eval_batch_size="128" \ - --learning_rate="3e-4" \ - --warmup_steps="1000" \ - --overwrite_output_dir \ - --num_train_epochs="18" \ - --adam_beta1="0.9" \ - --adam_beta2="0.98" \ - --do_train \ - --do_eval \ - --logging_steps="500" \ - --evaluation_strategy="epoch" \ - --report_to="tensorboard" \ - --save_strategy="no" + --model_type="roberta" \ + --config_name="${MODEL_DIR}" \ + --tokenizer_name="${MODEL_DIR}" \ + --dataset_name="oscar" \ + --dataset_config_name="unshuffled_deduplicated_no" \ + --max_seq_length="128" \ + --weight_decay="0.01" \ + --per_device_train_batch_size="128" \ + --per_device_eval_batch_size="128" \ + --learning_rate="3e-4" \ + --warmup_steps="1000" \ + --overwrite_output_dir \ + --num_train_epochs="18" \ + --adam_beta1="0.9" \ + --adam_beta2="0.98" \ + --do_train \ + --do_eval \ + --logging_steps="500" \ + --evaluation_strategy="epoch" \ + --report_to="tensorboard" \ + --save_strategy="no" ``` ### Script to compare pre-training with PyTorch on 8 GPU V100's @@ -281,27 +281,27 @@ mkdir -p ${MODEL_DIR} ```bash python3 -m torch.distributed.launch --nproc_per_node ${NUM_GPUS} run_mlm.py \ - --output_dir="./runs" \ - --model_type="roberta" \ - --config_name="${MODEL_DIR}" \ - --tokenizer_name="${MODEL_DIR}" \ - --dataset_name="oscar" \ - --dataset_config_name="unshuffled_deduplicated_no" \ - --max_seq_length="128" \ - --weight_decay="0.01" \ - --per_device_train_batch_size="32" \ - --per_device_eval_batch_size="32" \ - --gradient_accumulation="4" \ - --learning_rate="3e-4" \ - --warmup_steps="1000" \ - --overwrite_output_dir \ - --num_train_epochs="18" \ - --adam_beta1="0.9" \ - --adam_beta2="0.98" \ - --do_train \ - --do_eval \ - --logging_steps="500" \ - --evaluation_strategy="steps" \ - --report_to="tensorboard" \ - --save_strategy="no" + --output_dir="./runs" \ + --model_type="roberta" \ + --config_name="${MODEL_DIR}" \ + --tokenizer_name="${MODEL_DIR}" \ + --dataset_name="oscar" \ + --dataset_config_name="unshuffled_deduplicated_no" \ + --max_seq_length="128" \ + --weight_decay="0.01" \ + --per_device_train_batch_size="32" \ + --per_device_eval_batch_size="32" \ + --gradient_accumulation="4" \ + --learning_rate="3e-4" \ + --warmup_steps="1000" \ + --overwrite_output_dir \ + --num_train_epochs="18" \ + --adam_beta1="0.9" \ + --adam_beta2="0.98" \ + --do_train \ + --do_eval \ + --logging_steps="500" \ + --evaluation_strategy="steps" \ + --report_to="tensorboard" \ + --save_strategy="no" ```