From 38084507c45c784dd5041058b8aa1676a633a18c Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Wed, 2 Oct 2019 11:00:46 -0400 Subject: [PATCH] add distillation_configs --- .../training_configs/distilbert-base-uncased.json | 15 +++++++++++++++ .../distillation/training_configs/distilgpt2.json | 10 ++++++++++ 2 files changed, 25 insertions(+) create mode 100644 examples/distillation/training_configs/distilbert-base-uncased.json create mode 100644 examples/distillation/training_configs/distilgpt2.json diff --git a/examples/distillation/training_configs/distilbert-base-uncased.json b/examples/distillation/training_configs/distilbert-base-uncased.json new file mode 100644 index 0000000000..15d1e7fe00 --- /dev/null +++ b/examples/distillation/training_configs/distilbert-base-uncased.json @@ -0,0 +1,15 @@ +{ + "activation": "gelu", + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "n_heads": 12, + "n_layers": 6, + "sinusoidal_pos_embds": true, + "tie_weights_": true, + "vocab_size": 30522 + } + \ No newline at end of file diff --git a/examples/distillation/training_configs/distilgpt2.json b/examples/distillation/training_configs/distilgpt2.json new file mode 100644 index 0000000000..8616e8e60f --- /dev/null +++ b/examples/distillation/training_configs/distilgpt2.json @@ -0,0 +1,10 @@ +{ + "initializer_range": 0.02, + "layer_norm_epsilon": 0.00001, + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_layer": 6, + "n_positions": 1024, + "vocab_size": 50257 +} \ No newline at end of file