weigths*weights
This commit is contained in:
@@ -27,7 +27,7 @@ loss = outputs[0]
|
|||||||
# In transformers you can also have access to the logits:
|
# In transformers you can also have access to the logits:
|
||||||
loss, logits = outputs[:2]
|
loss, logits = outputs[:2]
|
||||||
|
|
||||||
# And even the attention weigths if you configure the model to output them (and other outputs too, see the docstrings and documentation)
|
# And even the attention weights if you configure the model to output them (and other outputs too, see the docstrings and documentation)
|
||||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)
|
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)
|
||||||
outputs = model(input_ids, labels=labels)
|
outputs = model(input_ids, labels=labels)
|
||||||
loss, logits, attentions = outputs
|
loss, logits, attentions = outputs
|
||||||
|
|||||||
@@ -136,7 +136,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
|
|||||||
if "kernel" in name or "proj" in name:
|
if "kernel" in name or "proj" in name:
|
||||||
array = np.transpose(array)
|
array = np.transpose(array)
|
||||||
if ("r_r_bias" in name or "r_w_bias" in name) and len(pointer) > 1:
|
if ("r_r_bias" in name or "r_w_bias" in name) and len(pointer) > 1:
|
||||||
# Here we will split the TF weigths
|
# Here we will split the TF weights
|
||||||
assert len(pointer) == array.shape[0]
|
assert len(pointer) == array.shape[0]
|
||||||
for i, p_i in enumerate(pointer):
|
for i, p_i in enumerate(pointer):
|
||||||
arr_i = array[i, ...]
|
arr_i = array[i, ...]
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
|
|||||||
logger.info("Transposing")
|
logger.info("Transposing")
|
||||||
array = np.transpose(array)
|
array = np.transpose(array)
|
||||||
if isinstance(pointer, list):
|
if isinstance(pointer, list):
|
||||||
# Here we will split the TF weigths
|
# Here we will split the TF weights
|
||||||
assert len(pointer) == array.shape[0]
|
assert len(pointer) == array.shape[0]
|
||||||
for i, p_i in enumerate(pointer):
|
for i, p_i in enumerate(pointer):
|
||||||
arr_i = array[i, ...]
|
arr_i = array[i, ...]
|
||||||
|
|||||||
@@ -59,4 +59,4 @@ You can then finish the addition step by adding imports for your classes in the
|
|||||||
- [ ] add a link to your conversion script in the main conversion utility (in `commands/convert.py`)
|
- [ ] add a link to your conversion script in the main conversion utility (in `commands/convert.py`)
|
||||||
- [ ] edit the PyTorch to TF 2.0 conversion script to add your model in the `convert_pytorch_checkpoint_to_tf2.py` file
|
- [ ] edit the PyTorch to TF 2.0 conversion script to add your model in the `convert_pytorch_checkpoint_to_tf2.py` file
|
||||||
- [ ] add a mention of your model in the doc: `README.md` and the documentation itself at `docs/source/pretrained_models.rst`.
|
- [ ] add a mention of your model in the doc: `README.md` and the documentation itself at `docs/source/pretrained_models.rst`.
|
||||||
- [ ] upload the pretrained weigths, configurations and vocabulary files.
|
- [ ] upload the pretrained weights, configurations and vocabulary files.
|
||||||
|
|||||||
Reference in New Issue
Block a user