[Flax] Add links to google colabs (#12146)
* fix_torch_device_generate_test * remove @ * add colab links
This commit is contained in:
committed by
GitHub
parent
476ba679dd
commit
7566fefa69
@@ -58,5 +58,6 @@ The following table lists all of our examples on how to use 🤗 Transformers wi
|
|||||||
|
|
||||||
| Task | Example model | Example dataset | 🤗 Datasets | Colab
|
| Task | Example model | Example dataset | 🤗 Datasets | Colab
|
||||||
|---|---|---|:---:|:---:|
|
|---|---|---|:---:|:---:|
|
||||||
| [**`masked-language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/flax/language-modeling) | BERT | OSCAR | ✅ | []()
|
| [**`causal-language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/flax/language-modeling) | GPT2 | OSCAR | ✅ | [](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/causal_language_modeling_flax.ipynb)
|
||||||
|
| [**`masked-language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/flax/language-modeling) | RoBERTa | OSCAR | ✅ | [](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/masked_language_modeling_flax.ipynb)
|
||||||
| [**`text-classification`**](https://github.com/huggingface/transformers/tree/master/examples/flax/text-classification) | BERT | GLUE | ✅ | [](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/text_classification_flax.ipynb)
|
| [**`text-classification`**](https://github.com/huggingface/transformers/tree/master/examples/flax/text-classification) | BERT | GLUE | ✅ | [](https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/text_classification_flax.ipynb)
|
||||||
|
|||||||
@@ -98,23 +98,23 @@ Next we can run the example script to pretrain the model:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
./run_mlm_flax.py \
|
./run_mlm_flax.py \
|
||||||
--output_dir="./runs" \
|
--output_dir="./runs" \
|
||||||
--model_type="roberta" \
|
--model_type="roberta" \
|
||||||
--config_name="${MODEL_DIR}" \
|
--config_name="${MODEL_DIR}" \
|
||||||
--tokenizer_name="${MODEL_DIR}" \
|
--tokenizer_name="${MODEL_DIR}" \
|
||||||
--dataset_name="oscar" \
|
--dataset_name="oscar" \
|
||||||
--dataset_config_name="unshuffled_deduplicated_no" \
|
--dataset_config_name="unshuffled_deduplicated_no" \
|
||||||
--max_seq_length="128" \
|
--max_seq_length="128" \
|
||||||
--weight_decay="0.01" \
|
--weight_decay="0.01" \
|
||||||
--per_device_train_batch_size="128" \
|
--per_device_train_batch_size="128" \
|
||||||
--per_device_eval_batch_size="128" \
|
--per_device_eval_batch_size="128" \
|
||||||
--learning_rate="3e-4" \
|
--learning_rate="3e-4" \
|
||||||
--warmup_steps="1000" \
|
--warmup_steps="1000" \
|
||||||
--overwrite_output_dir \
|
--overwrite_output_dir \
|
||||||
--pad_to_max_length \
|
--pad_to_max_length \
|
||||||
--num_train_epochs="18" \
|
--num_train_epochs="18" \
|
||||||
--adam_beta1="0.9" \
|
--adam_beta1="0.9" \
|
||||||
--adam_beta2="0.98"
|
--adam_beta2="0.98"
|
||||||
```
|
```
|
||||||
|
|
||||||
Training should converge at a loss and accuracy
|
Training should converge at a loss and accuracy
|
||||||
@@ -235,27 +235,27 @@ mkdir -p ${MODEL_DIR}
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 xla_spawn.py --num_cores ${NUM_TPUS} run_mlm.py --output_dir="./runs" \
|
python3 xla_spawn.py --num_cores ${NUM_TPUS} run_mlm.py --output_dir="./runs" \
|
||||||
--model_type="roberta" \
|
--model_type="roberta" \
|
||||||
--config_name="${MODEL_DIR}" \
|
--config_name="${MODEL_DIR}" \
|
||||||
--tokenizer_name="${MODEL_DIR}" \
|
--tokenizer_name="${MODEL_DIR}" \
|
||||||
--dataset_name="oscar" \
|
--dataset_name="oscar" \
|
||||||
--dataset_config_name="unshuffled_deduplicated_no" \
|
--dataset_config_name="unshuffled_deduplicated_no" \
|
||||||
--max_seq_length="128" \
|
--max_seq_length="128" \
|
||||||
--weight_decay="0.01" \
|
--weight_decay="0.01" \
|
||||||
--per_device_train_batch_size="128" \
|
--per_device_train_batch_size="128" \
|
||||||
--per_device_eval_batch_size="128" \
|
--per_device_eval_batch_size="128" \
|
||||||
--learning_rate="3e-4" \
|
--learning_rate="3e-4" \
|
||||||
--warmup_steps="1000" \
|
--warmup_steps="1000" \
|
||||||
--overwrite_output_dir \
|
--overwrite_output_dir \
|
||||||
--num_train_epochs="18" \
|
--num_train_epochs="18" \
|
||||||
--adam_beta1="0.9" \
|
--adam_beta1="0.9" \
|
||||||
--adam_beta2="0.98" \
|
--adam_beta2="0.98" \
|
||||||
--do_train \
|
--do_train \
|
||||||
--do_eval \
|
--do_eval \
|
||||||
--logging_steps="500" \
|
--logging_steps="500" \
|
||||||
--evaluation_strategy="epoch" \
|
--evaluation_strategy="epoch" \
|
||||||
--report_to="tensorboard" \
|
--report_to="tensorboard" \
|
||||||
--save_strategy="no"
|
--save_strategy="no"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Script to compare pre-training with PyTorch on 8 GPU V100's
|
### Script to compare pre-training with PyTorch on 8 GPU V100's
|
||||||
@@ -281,27 +281,27 @@ mkdir -p ${MODEL_DIR}
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 -m torch.distributed.launch --nproc_per_node ${NUM_GPUS} run_mlm.py \
|
python3 -m torch.distributed.launch --nproc_per_node ${NUM_GPUS} run_mlm.py \
|
||||||
--output_dir="./runs" \
|
--output_dir="./runs" \
|
||||||
--model_type="roberta" \
|
--model_type="roberta" \
|
||||||
--config_name="${MODEL_DIR}" \
|
--config_name="${MODEL_DIR}" \
|
||||||
--tokenizer_name="${MODEL_DIR}" \
|
--tokenizer_name="${MODEL_DIR}" \
|
||||||
--dataset_name="oscar" \
|
--dataset_name="oscar" \
|
||||||
--dataset_config_name="unshuffled_deduplicated_no" \
|
--dataset_config_name="unshuffled_deduplicated_no" \
|
||||||
--max_seq_length="128" \
|
--max_seq_length="128" \
|
||||||
--weight_decay="0.01" \
|
--weight_decay="0.01" \
|
||||||
--per_device_train_batch_size="32" \
|
--per_device_train_batch_size="32" \
|
||||||
--per_device_eval_batch_size="32" \
|
--per_device_eval_batch_size="32" \
|
||||||
--gradient_accumulation="4" \
|
--gradient_accumulation="4" \
|
||||||
--learning_rate="3e-4" \
|
--learning_rate="3e-4" \
|
||||||
--warmup_steps="1000" \
|
--warmup_steps="1000" \
|
||||||
--overwrite_output_dir \
|
--overwrite_output_dir \
|
||||||
--num_train_epochs="18" \
|
--num_train_epochs="18" \
|
||||||
--adam_beta1="0.9" \
|
--adam_beta1="0.9" \
|
||||||
--adam_beta2="0.98" \
|
--adam_beta2="0.98" \
|
||||||
--do_train \
|
--do_train \
|
||||||
--do_eval \
|
--do_eval \
|
||||||
--logging_steps="500" \
|
--logging_steps="500" \
|
||||||
--evaluation_strategy="steps" \
|
--evaluation_strategy="steps" \
|
||||||
--report_to="tensorboard" \
|
--report_to="tensorboard" \
|
||||||
--save_strategy="no"
|
--save_strategy="no"
|
||||||
```
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user