[Reformer] Add Enwiki8 Reformer Model - Adapt convert script (#4282)
* adapt convert script * update convert script * finish * fix marian pretrained docs
This commit is contained in:
committed by
GitHub
parent
336116d960
commit
ac7d5f67a2
@@ -296,9 +296,12 @@ For a list that includes community-uploaded models, refer to `https://huggingfac
|
|||||||
| | ``DialoGPT-large`` | | 36-layer, 1280-hidden, 20-heads, 774M parameters |
|
| | ``DialoGPT-large`` | | 36-layer, 1280-hidden, 20-heads, 774M parameters |
|
||||||
| | | | Trained on English text: 147M conversation-like exchanges extracted from Reddit. |
|
| | | | Trained on English text: 147M conversation-like exchanges extracted from Reddit. |
|
||||||
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
| Reformer | ``reformer-crime-and-punishment`` | | 6-layer, 256-hidden, 2-heads, 3M parameters |
|
| Reformer | ``reformer-enwik8`` | | 12-layer, 1024-hidden, 8-heads, 149M parameters |
|
||||||
| | | | Trained on English text: Crime and Punishment novel by Fyodor Dostoyevsky |
|
| | | | Trained on English Wikipedia data - enwik8. |
|
||||||
|
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
|
| | ``reformer-crime-and-punishment`` | | 6-layer, 256-hidden, 2-heads, 3M parameters |
|
||||||
|
| | | | Trained on English text: Crime and Punishment novel by Fyodor Dostoyevsky. |
|
||||||
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
| MarianMT | ``Helsinki-NLP/opus-mt-{src}-{tgt}`` | | 12-layer, 512-hidden, 8-heads, ~74M parameter Machine translation models. Parameter counts vary depending on vocab size. |
|
| MarianMT | ``Helsinki-NLP/opus-mt-{src}-{tgt}`` | | 12-layer, 512-hidden, 8-heads, ~74M parameter Machine translation models. Parameter counts vary depending on vocab size. |
|
||||||
| | | | (see `model list <https://huggingface.co/Helsinki-NLP>`_ |
|
| | | | (see `model list <https://huggingface.co/Helsinki-NLP>`_) |
|
||||||
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
|
|||||||
@@ -24,7 +24,8 @@ from .configuration_utils import PretrainedConfig
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||||
"google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/config.json"
|
"google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/config.json",
|
||||||
|
"google/reformer-enwik8": "https://cdn.huggingface.co/google/reformer-enwik8/config.json",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
|
|||||||
set_layer_weights_in_torch_local(attn_weights, torch_block.attention, hidden_size)
|
set_layer_weights_in_torch_local(attn_weights, torch_block.attention, hidden_size)
|
||||||
|
|
||||||
# intermediate weighs
|
# intermediate weighs
|
||||||
intermediate_weights = weights[2][0][2][2]
|
intermediate_weights = weights[2][0][1][2]
|
||||||
|
|
||||||
# Chunked Feed Forward
|
# Chunked Feed Forward
|
||||||
if len(intermediate_weights) == 4:
|
if len(intermediate_weights) == 4:
|
||||||
@@ -145,19 +145,16 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
|
|||||||
position_embeddings.weights[emb_idx] = torch.nn.Parameter(torch.tensor(emb_weights))
|
position_embeddings.weights[emb_idx] = torch.nn.Parameter(torch.tensor(emb_weights))
|
||||||
|
|
||||||
trax_layer_weights = weights[5]
|
trax_layer_weights = weights[5]
|
||||||
assert len(torch_model_reformer.encoder.layers) * 4 + 1 == len(
|
assert len(torch_model_reformer.encoder.layers) * 4 == len(
|
||||||
trax_layer_weights
|
trax_layer_weights
|
||||||
), "HF and trax model do not have the same number of layers"
|
), "HF and trax model do not have the same number of layers"
|
||||||
for layer_idx, layer in enumerate(torch_model_reformer.encoder.layers):
|
for layer_idx, layer in enumerate(torch_model_reformer.encoder.layers):
|
||||||
block_weights = trax_layer_weights[4 * layer_idx : 4 * (layer_idx + 1)]
|
block_weights = trax_layer_weights[4 * layer_idx : 4 * (layer_idx + 1)]
|
||||||
set_block_weights_in_torch(block_weights, layer, hidden_size)
|
set_block_weights_in_torch(block_weights, layer, hidden_size)
|
||||||
|
|
||||||
# output weights
|
|
||||||
out_weights = weights[6]
|
|
||||||
|
|
||||||
# output layer norm
|
# output layer norm
|
||||||
layer_norm_out_weight = np.asarray(out_weights[0][0])
|
layer_norm_out_weight = np.asarray(weights[7][0])
|
||||||
layer_norm_out_bias = np.asarray(out_weights[0][1])
|
layer_norm_out_bias = np.asarray(weights[7][1])
|
||||||
set_param(
|
set_param(
|
||||||
torch_model_reformer.encoder.layer_norm,
|
torch_model_reformer.encoder.layer_norm,
|
||||||
torch.tensor(layer_norm_out_weight),
|
torch.tensor(layer_norm_out_weight),
|
||||||
@@ -165,8 +162,8 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# output embeddings
|
# output embeddings
|
||||||
output_embed_weights = np.asarray(out_weights[2][0])
|
output_embed_weights = np.asarray(weights[9][0])
|
||||||
output_embed_bias = np.asarray(out_weights[2][1])
|
output_embed_bias = np.asarray(weights[9][1])
|
||||||
set_param(
|
set_param(
|
||||||
torch_model.lm_head.decoder,
|
torch_model.lm_head.decoder,
|
||||||
torch.tensor(output_embed_weights).transpose(0, 1).contiguous(),
|
torch.tensor(output_embed_weights).transpose(0, 1).contiguous(),
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ from .modeling_utils import PreTrainedModel, apply_chunking_to_forward
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = {
|
REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = {
|
||||||
"google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/pytorch_model.bin"
|
"google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/pytorch_model.bin",
|
||||||
|
"google/reformer-enwik8": "https://cdn.huggingface.co/google/reformer-enwik8/pytorch_model.bin",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user