Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bae0c79f6f | ||
|
|
0d4c9808c4 |
2
setup.py
2
setup.py
@@ -282,7 +282,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.3.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.3.3", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
||||
author_email="thomas@huggingface.co",
|
||||
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.3.2"
|
||||
__version__ = "4.3.3"
|
||||
|
||||
# Work around to update TensorFlow's absl.logging threshold which alters the
|
||||
# default Python logging output behavior when present.
|
||||
|
||||
@@ -56,7 +56,11 @@ def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_remove="")
|
||||
tf_name = tf_name[1:] # Remove level zero
|
||||
|
||||
# When should we transpose the weights
|
||||
transpose = bool(tf_name[-1] == "kernel" or "emb_projs" in tf_name or "out_projs" in tf_name)
|
||||
transpose = bool(
|
||||
tf_name[-1] in ["kernel", "pointwise_kernel", "depthwise_kernel"]
|
||||
or "emb_projs" in tf_name
|
||||
or "out_projs" in tf_name
|
||||
)
|
||||
|
||||
# Convert standard TF2.0 names in PyTorch names
|
||||
if tf_name[-1] == "kernel" or tf_name[-1] == "embeddings" or tf_name[-1] == "gamma":
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import argparse
|
||||
|
||||
from transformers import ConvBertConfig, ConvBertModel, load_tf_weights_in_convbert
|
||||
from transformers import ConvBertConfig, ConvBertModel, TFConvBertModel, load_tf_weights_in_convbert
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
@@ -30,6 +30,9 @@ def convert_orig_tf1_checkpoint_to_pytorch(tf_checkpoint_path, convbert_config_f
|
||||
model = load_tf_weights_in_convbert(model, conf, tf_checkpoint_path)
|
||||
model.save_pretrained(pytorch_dump_path)
|
||||
|
||||
tf_model = TFConvBertModel.from_pretrained(pytorch_dump_path, from_pt=True)
|
||||
tf_model.save_pretrained(pytorch_dump_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
@@ -425,7 +425,7 @@ class GroupedLinearLayer(tf.keras.layers.Layer):
|
||||
def build(self, input_shape):
|
||||
self.kernel = self.add_weight(
|
||||
"kernel",
|
||||
shape=[self.num_groups, self.group_in_dim, self.group_out_dim],
|
||||
shape=[self.group_out_dim, self.group_in_dim, self.num_groups],
|
||||
initializer=self.kernel_initializer,
|
||||
trainable=True,
|
||||
)
|
||||
@@ -437,7 +437,7 @@ class GroupedLinearLayer(tf.keras.layers.Layer):
|
||||
def call(self, hidden_states):
|
||||
batch_size = shape_list(hidden_states)[0]
|
||||
x = tf.transpose(tf.reshape(hidden_states, [-1, self.num_groups, self.group_in_dim]), [1, 0, 2])
|
||||
x = tf.matmul(x, self.kernel)
|
||||
x = tf.matmul(x, tf.transpose(self.kernel, [2, 1, 0]))
|
||||
x = tf.transpose(x, [1, 0, 2])
|
||||
x = tf.reshape(x, [batch_size, -1, self.output_size])
|
||||
x = tf.nn.bias_add(value=x, bias=self.bias)
|
||||
|
||||
@@ -384,8 +384,6 @@ class TFConvBertModelIntegrationTest(unittest.TestCase):
|
||||
expected_shape = [1, 6, 768]
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
|
||||
print(output[:, :3, :3])
|
||||
|
||||
expected_slice = tf.constant(
|
||||
[
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user