add distillation_configs
This commit is contained in:
@@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"activation": "gelu",
|
||||||
|
"attention_dropout": 0.1,
|
||||||
|
"dim": 768,
|
||||||
|
"dropout": 0.1,
|
||||||
|
"hidden_dim": 3072,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"max_position_embeddings": 512,
|
||||||
|
"n_heads": 12,
|
||||||
|
"n_layers": 6,
|
||||||
|
"sinusoidal_pos_embds": true,
|
||||||
|
"tie_weights_": true,
|
||||||
|
"vocab_size": 30522
|
||||||
|
}
|
||||||
|
|
||||||
10
examples/distillation/training_configs/distilgpt2.json
Normal file
10
examples/distillation/training_configs/distilgpt2.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 0.00001,
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_layer": 6,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"vocab_size": 50257
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user