test suite independent of framework
This commit is contained in:
@@ -10,7 +10,7 @@ jobs:
|
|||||||
- checkout
|
- checkout
|
||||||
- run: sudo pip install torch
|
- run: sudo pip install torch
|
||||||
- run: sudo pip install --progress-bar off .
|
- run: sudo pip install --progress-bar off .
|
||||||
- run: sudo pip install pytest codecov pytest-cov
|
- run: sudo pip install pytest==5.0.1 codecov pytest-cov
|
||||||
- run: sudo pip install tensorboardX scikit-learn
|
- run: sudo pip install tensorboardX scikit-learn
|
||||||
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
||||||
- run: python -m pytest -sv ./examples/
|
- run: python -m pytest -sv ./examples/
|
||||||
@@ -25,10 +25,9 @@ jobs:
|
|||||||
- checkout
|
- checkout
|
||||||
- run: sudo pip install tensorflow==2.0.0-rc0
|
- run: sudo pip install tensorflow==2.0.0-rc0
|
||||||
- run: sudo pip install --progress-bar off .
|
- run: sudo pip install --progress-bar off .
|
||||||
- run: sudo pip install pytest codecov pytest-cov
|
- run: sudo pip install pytest==5.0.1 codecov pytest-cov
|
||||||
- run: sudo pip install tensorboardX scikit-learn
|
- run: sudo pip install tensorboardX scikit-learn
|
||||||
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
||||||
- run: python -m pytest -sv ./examples/
|
|
||||||
- run: codecov
|
- run: codecov
|
||||||
build_py2_torch:
|
build_py2_torch:
|
||||||
working_directory: ~/pytorch-transformers
|
working_directory: ~/pytorch-transformers
|
||||||
@@ -40,7 +39,7 @@ jobs:
|
|||||||
- checkout
|
- checkout
|
||||||
- run: sudo pip install torch
|
- run: sudo pip install torch
|
||||||
- run: sudo pip install --progress-bar off .
|
- run: sudo pip install --progress-bar off .
|
||||||
- run: sudo pip install pytest codecov pytest-cov
|
- run: sudo pip install pytest==5.0.1 codecov pytest-cov
|
||||||
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
||||||
- run: codecov
|
- run: codecov
|
||||||
build_py2_tf:
|
build_py2_tf:
|
||||||
@@ -53,7 +52,7 @@ jobs:
|
|||||||
- checkout
|
- checkout
|
||||||
- run: sudo pip install tensorflow==2.0.0-rc0
|
- run: sudo pip install tensorflow==2.0.0-rc0
|
||||||
- run: sudo pip install --progress-bar off .
|
- run: sudo pip install --progress-bar off .
|
||||||
- run: sudo pip install pytest codecov pytest-cov
|
- run: sudo pip install pytest==5.0.1 codecov pytest-cov
|
||||||
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
|
||||||
- run: codecov
|
- run: codecov
|
||||||
deploy_doc:
|
deploy_doc:
|
||||||
|
|||||||
@@ -43,11 +43,11 @@ from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CO
|
|||||||
# Modeling
|
# Modeling
|
||||||
try:
|
try:
|
||||||
import torch
|
import torch
|
||||||
torch_available = True # pylint: disable=invalid-name
|
_torch_available = True # pylint: disable=invalid-name
|
||||||
except ImportError:
|
except ImportError:
|
||||||
torch_available = False # pylint: disable=invalid-name
|
_torch_available = False # pylint: disable=invalid-name
|
||||||
|
|
||||||
if torch_available:
|
if _torch_available:
|
||||||
logger.info("PyTorch version {} available.".format(torch.__version__))
|
logger.info("PyTorch version {} available.".format(torch.__version__))
|
||||||
|
|
||||||
from .modeling_utils import (PreTrainedModel, prune_layer, Conv1D)
|
from .modeling_utils import (PreTrainedModel, prune_layer, Conv1D)
|
||||||
@@ -87,19 +87,26 @@ if torch_available:
|
|||||||
# TensorFlow
|
# TensorFlow
|
||||||
try:
|
try:
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
tf_available = True # pylint: disable=invalid-name
|
assert int(tf.__version__[0]) >= 2
|
||||||
|
_tf_available = True # pylint: disable=invalid-name
|
||||||
except ImportError:
|
except ImportError:
|
||||||
tf_available = False # pylint: disable=invalid-name
|
_tf_available = False # pylint: disable=invalid-name
|
||||||
|
|
||||||
if tf_available:
|
if _tf_available:
|
||||||
logger.info("TensorFlow version {} available.".format(tf.__version__))
|
logger.info("TensorFlow version {} available.".format(tf.__version__))
|
||||||
|
|
||||||
from .modeling_tf_utils import TFPreTrainedModel
|
from .modeling_tf_utils import TFPreTrainedModel
|
||||||
from .modeling_tf_bert import (TFBertPreTrainedModel, TFBertModel, TFBertForPreTraining,
|
from .modeling_tf_bert import (TFBertPreTrainedModel, TFBertModel, TFBertForPreTraining,
|
||||||
TFBertForMaskedLM, TFBertForNextSentencePrediction, load_pt_weights_in_bert)
|
TFBertForMaskedLM, TFBertForNextSentencePrediction, load_bert_pt_weights_in_tf)
|
||||||
|
|
||||||
|
|
||||||
# Files and general utilities
|
# Files and general utilities
|
||||||
from .file_utils import (PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE,
|
from .file_utils import (PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE,
|
||||||
cached_path, add_start_docstrings, add_end_docstrings,
|
cached_path, add_start_docstrings, add_end_docstrings,
|
||||||
WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME)
|
WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME)
|
||||||
|
|
||||||
|
def is_torch_available():
|
||||||
|
return _torch_available
|
||||||
|
|
||||||
|
def is_tf_available():
|
||||||
|
return _tf_available
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Convert BERT checkpoint."""
|
""" Convert pytorch checkpoints to TensorFlow """
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
@@ -21,19 +21,22 @@ from __future__ import print_function
|
|||||||
import argparse
|
import argparse
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from pytorch_transformers import BertConfig, TFBertForPreTraining, load_pt_weights_in_bert
|
from pytorch_transformers import BertConfig, TFBertForPreTraining, load_bert_pt_weights_in_tf
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
def convert_bert_checkpoint_to_tf(pytorch_checkpoint_path, bert_config_file, tf_dump_path):
|
def convert_pt_checkpoint_to_tf(model_type, pytorch_checkpoint_path, config_file, tf_dump_path):
|
||||||
|
if model_type == 'bert':
|
||||||
# Initialise TF model
|
# Initialise TF model
|
||||||
config = BertConfig.from_json_file(bert_config_file)
|
config = BertConfig.from_json_file(config_file)
|
||||||
print("Building TensorFlow model from configuration: {}".format(str(config)))
|
print("Building TensorFlow model from configuration: {}".format(str(config)))
|
||||||
model = TFBertForPreTraining(config)
|
model = TFBertForPreTraining(config)
|
||||||
|
|
||||||
# Load weights from tf checkpoint
|
# Load weights from tf checkpoint
|
||||||
model = load_pt_weights_in_bert(model, config, pytorch_checkpoint_path)
|
model = load_bert_pt_weights_in_tf(model, config, pytorch_checkpoint_path)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unrecognized model type, should be one of ['bert'].")
|
||||||
|
|
||||||
# Save pytorch-model
|
# Save pytorch-model
|
||||||
print("Save TensorFlow model to {}".format(tf_dump_path))
|
print("Save TensorFlow model to {}".format(tf_dump_path))
|
||||||
@@ -43,16 +46,21 @@ def convert_bert_checkpoint_to_tf(pytorch_checkpoint_path, bert_config_file, tf_
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
## Required parameters
|
## Required parameters
|
||||||
|
parser.add_argument("--model_type",
|
||||||
|
default = None,
|
||||||
|
type = str,
|
||||||
|
required = True,
|
||||||
|
help = "Model type selcted in the list of.")
|
||||||
parser.add_argument("--pytorch_checkpoint_path",
|
parser.add_argument("--pytorch_checkpoint_path",
|
||||||
default = None,
|
default = None,
|
||||||
type = str,
|
type = str,
|
||||||
required = True,
|
required = True,
|
||||||
help = "Path to the PyTorch checkpoint path.")
|
help = "Path to the PyTorch checkpoint path.")
|
||||||
parser.add_argument("--bert_config_file",
|
parser.add_argument("--config_file",
|
||||||
default = None,
|
default = None,
|
||||||
type = str,
|
type = str,
|
||||||
required = True,
|
required = True,
|
||||||
help = "The config json file corresponding to the pre-trained BERT model. \n"
|
help = "The config json file corresponding to the pre-trained model. \n"
|
||||||
"This specifies the model architecture.")
|
"This specifies the model architecture.")
|
||||||
parser.add_argument("--tf_dump_path",
|
parser.add_argument("--tf_dump_path",
|
||||||
default = None,
|
default = None,
|
||||||
@@ -60,6 +68,7 @@ if __name__ == "__main__":
|
|||||||
required = True,
|
required = True,
|
||||||
help = "Path to the output Tensorflow dump file.")
|
help = "Path to the output Tensorflow dump file.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
convert_bert_checkpoint_to_tf(args.pytorch_checkpoint_path,
|
convert_pt_checkpoint_to_tf(args.model_type.lower(),
|
||||||
args.bert_config_file,
|
args.pytorch_checkpoint_path,
|
||||||
|
args.config_file,
|
||||||
args.tf_dump_path)
|
args.tf_dump_path)
|
||||||
@@ -51,7 +51,7 @@ TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_pt_weights_in_bert(tf_model, config, pytorch_checkpoint_path):
|
def load_bert_pt_weights_in_tf(tf_model, config, pytorch_checkpoint_path):
|
||||||
""" Load pytorch checkpoints in a TF 2.0 model and save it using HDF5 format
|
""" Load pytorch checkpoints in a TF 2.0 model and save it using HDF5 format
|
||||||
We use HDF5 to easily do transfer learning
|
We use HDF5 to easily do transfer learning
|
||||||
(see https://github.com/tensorflow/tensorflow/blob/ee16fcac960ae660e0e4496658a366e2f745e1f0/tensorflow/python/keras/engine/network.py#L1352-L1357).
|
(see https://github.com/tensorflow/tensorflow/blob/ee16fcac960ae660e0e4496658a366e2f745e1f0/tensorflow/python/keras/engine/network.py#L1352-L1357).
|
||||||
@@ -150,6 +150,7 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
|
|||||||
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
||||||
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
input_ids, position_ids, token_type_ids = inputs
|
input_ids, position_ids, token_type_ids = inputs
|
||||||
|
|
||||||
@@ -194,6 +195,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
|
|||||||
x = tf.reshape(x, (batch_size, -1, self.num_attention_heads, self.attention_head_size))
|
x = tf.reshape(x, (batch_size, -1, self.num_attention_heads, self.attention_head_size))
|
||||||
return tf.transpose(x, perm=[0, 2, 1, 3])
|
return tf.transpose(x, perm=[0, 2, 1, 3])
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
hidden_states, attention_mask, head_mask = inputs
|
hidden_states, attention_mask, head_mask = inputs
|
||||||
|
|
||||||
@@ -242,6 +244,7 @@ class TFBertSelfOutput(tf.keras.layers.Layer):
|
|||||||
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
||||||
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
hidden_states, input_tensor = inputs
|
hidden_states, input_tensor = inputs
|
||||||
|
|
||||||
@@ -261,6 +264,7 @@ class TFBertAttention(tf.keras.layers.Layer):
|
|||||||
def prune_heads(self, heads):
|
def prune_heads(self, heads):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
input_tensor, attention_mask, head_mask = inputs
|
input_tensor, attention_mask, head_mask = inputs
|
||||||
|
|
||||||
@@ -279,6 +283,7 @@ class TFBertIntermediate(tf.keras.layers.Layer):
|
|||||||
else:
|
else:
|
||||||
self.intermediate_act_fn = config.hidden_act
|
self.intermediate_act_fn = config.hidden_act
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, hidden_states):
|
def call(self, hidden_states):
|
||||||
hidden_states = self.dense(hidden_states)
|
hidden_states = self.dense(hidden_states)
|
||||||
hidden_states = self.intermediate_act_fn(hidden_states)
|
hidden_states = self.intermediate_act_fn(hidden_states)
|
||||||
@@ -292,6 +297,7 @@ class TFBertOutput(tf.keras.layers.Layer):
|
|||||||
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
||||||
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
hidden_states, input_tensor = inputs
|
hidden_states, input_tensor = inputs
|
||||||
|
|
||||||
@@ -309,6 +315,7 @@ class TFBertLayer(tf.keras.layers.Layer):
|
|||||||
self.intermediate = TFBertIntermediate(config, name='intermediate')
|
self.intermediate = TFBertIntermediate(config, name='intermediate')
|
||||||
self.bert_output = TFBertOutput(config, name='output')
|
self.bert_output = TFBertOutput(config, name='output')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
hidden_states, attention_mask, head_mask = inputs
|
hidden_states, attention_mask, head_mask = inputs
|
||||||
|
|
||||||
@@ -327,6 +334,7 @@ class TFBertEncoder(tf.keras.layers.Layer):
|
|||||||
self.output_hidden_states = config.output_hidden_states
|
self.output_hidden_states = config.output_hidden_states
|
||||||
self.layer = [TFBertLayer(config, name='layer_{}'.format(i)) for i in range(config.num_hidden_layers)]
|
self.layer = [TFBertLayer(config, name='layer_{}'.format(i)) for i in range(config.num_hidden_layers)]
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
hidden_states, attention_mask, head_mask = inputs
|
hidden_states, attention_mask, head_mask = inputs
|
||||||
|
|
||||||
@@ -359,6 +367,7 @@ class TFBertPooler(tf.keras.layers.Layer):
|
|||||||
super(TFBertPooler, self).__init__(**kwargs)
|
super(TFBertPooler, self).__init__(**kwargs)
|
||||||
self.dense = tf.keras.layers.Dense(config.hidden_size, activation='tanh', name='dense')
|
self.dense = tf.keras.layers.Dense(config.hidden_size, activation='tanh', name='dense')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, hidden_states):
|
def call(self, hidden_states):
|
||||||
# We "pool" the model by simply taking the hidden state corresponding
|
# We "pool" the model by simply taking the hidden state corresponding
|
||||||
# to the first token.
|
# to the first token.
|
||||||
@@ -377,6 +386,7 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
|
|||||||
self.transform_act_fn = config.hidden_act
|
self.transform_act_fn = config.hidden_act
|
||||||
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name='LayerNorm')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, hidden_states):
|
def call(self, hidden_states):
|
||||||
hidden_states = self.dense(hidden_states)
|
hidden_states = self.dense(hidden_states)
|
||||||
hidden_states = self.transform_act_fn(hidden_states)
|
hidden_states = self.transform_act_fn(hidden_states)
|
||||||
@@ -400,6 +410,7 @@ class TFBertLMPredictionHead(tf.keras.layers.Layer):
|
|||||||
trainable=True,
|
trainable=True,
|
||||||
name='bias')
|
name='bias')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, hidden_states):
|
def call(self, hidden_states):
|
||||||
hidden_states = self.transform(hidden_states)
|
hidden_states = self.transform(hidden_states)
|
||||||
hidden_states = self.decoder(hidden_states) + self.bias
|
hidden_states = self.decoder(hidden_states) + self.bias
|
||||||
@@ -411,6 +422,7 @@ class TFBertMLMHead(tf.keras.layers.Layer):
|
|||||||
super(TFBertMLMHead, self).__init__(**kwargs)
|
super(TFBertMLMHead, self).__init__(**kwargs)
|
||||||
self.predictions = TFBertLMPredictionHead(config, name='predictions')
|
self.predictions = TFBertLMPredictionHead(config, name='predictions')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, sequence_output):
|
def call(self, sequence_output):
|
||||||
prediction_scores = self.predictions(sequence_output)
|
prediction_scores = self.predictions(sequence_output)
|
||||||
return prediction_scores
|
return prediction_scores
|
||||||
@@ -421,6 +433,7 @@ class TFBertNSPHead(tf.keras.layers.Layer):
|
|||||||
super(TFBertNSPHead, self).__init__(**kwargs)
|
super(TFBertNSPHead, self).__init__(**kwargs)
|
||||||
self.seq_relationship = tf.keras.layers.Dense(2, name='seq_relationship')
|
self.seq_relationship = tf.keras.layers.Dense(2, name='seq_relationship')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, pooled_output):
|
def call(self, pooled_output):
|
||||||
seq_relationship_score = self.seq_relationship(pooled_output)
|
seq_relationship_score = self.seq_relationship(pooled_output)
|
||||||
return seq_relationship_score
|
return seq_relationship_score
|
||||||
@@ -447,6 +460,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
if not isinstance(inputs, (dict, tuple, list)):
|
if not isinstance(inputs, (dict, tuple, list)):
|
||||||
input_ids = inputs
|
input_ids = inputs
|
||||||
@@ -459,12 +473,12 @@ class TFBertMainLayer(tf.keras.layers.Layer):
|
|||||||
head_mask = inputs[4] if len(inputs) > 4 else None
|
head_mask = inputs[4] if len(inputs) > 4 else None
|
||||||
assert len(inputs) <= 5, "Too many inputs."
|
assert len(inputs) <= 5, "Too many inputs."
|
||||||
else:
|
else:
|
||||||
input_ids = inputs.pop('input_ids')
|
input_ids = inputs.get('input_ids')
|
||||||
attention_mask = inputs.pop('attention_mask', None)
|
attention_mask = inputs.get('attention_mask', None)
|
||||||
token_type_ids = inputs.pop('token_type_ids', None)
|
token_type_ids = inputs.get('token_type_ids', None)
|
||||||
position_ids = inputs.pop('position_ids', None)
|
position_ids = inputs.get('position_ids', None)
|
||||||
head_mask = inputs.pop('head_mask', None)
|
head_mask = inputs.get('head_mask', None)
|
||||||
assert len(inputs) == 0, "Unexpected inputs detected: {}. Check inputs dict key names.".format(list(inputs.keys()))
|
assert len(inputs) <= 5, "Too many inputs."
|
||||||
|
|
||||||
if attention_mask is None:
|
if attention_mask is None:
|
||||||
attention_mask = tf.fill(tf.shape(input_ids), 1)
|
attention_mask = tf.fill(tf.shape(input_ids), 1)
|
||||||
@@ -507,23 +521,16 @@ class TFBertMainLayer(tf.keras.layers.Layer):
|
|||||||
outputs = (sequence_output, pooled_output,) + encoder_outputs[1:] # add hidden_states and attentions if they are here
|
outputs = (sequence_output, pooled_output,) + encoder_outputs[1:] # add hidden_states and attentions if they are here
|
||||||
return outputs # sequence_output, pooled_output, (hidden_states), (attentions)
|
return outputs # sequence_output, pooled_output, (hidden_states), (attentions)
|
||||||
|
|
||||||
|
|
||||||
class TFBertPreTrainedModel(TFPreTrainedModel):
|
class TFBertPreTrainedModel(TFPreTrainedModel):
|
||||||
""" An abstract class to handle weights initialization and
|
""" An abstract class to handle weights initialization and
|
||||||
a simple interface for dowloading and loading pretrained models.
|
a simple interface for dowloading and loading pretrained models.
|
||||||
"""
|
"""
|
||||||
config_class = BertConfig
|
config_class = BertConfig
|
||||||
pretrained_model_archive_map = TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
pretrained_model_archive_map = TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
load_pt_weights = load_pt_weights_in_bert
|
load_pt_weights = load_bert_pt_weights_in_tf
|
||||||
base_model_prefix = "bert"
|
base_model_prefix = "bert"
|
||||||
|
|
||||||
def __init__(self, *inputs, **kwargs):
|
|
||||||
super(TFBertPreTrainedModel, self).__init__(*inputs, **kwargs)
|
|
||||||
|
|
||||||
def init_weights(self, module):
|
|
||||||
""" Initialize the weights.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
BERT_START_DOCSTRING = r""" The BERT model was proposed in
|
BERT_START_DOCSTRING = r""" The BERT model was proposed in
|
||||||
`BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_
|
`BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_
|
||||||
@@ -635,6 +642,7 @@ class TFBertModel(TFBertPreTrainedModel):
|
|||||||
super(TFBertModel, self).__init__(config)
|
super(TFBertModel, self).__init__(config)
|
||||||
self.bert = TFBertMainLayer(config, name='bert')
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
outputs = self.bert(inputs, training=training)
|
outputs = self.bert(inputs, training=training)
|
||||||
return outputs
|
return outputs
|
||||||
@@ -687,7 +695,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
|
|||||||
self.cls_mlm = TFBertMLMHead(config, name='cls_mlm')
|
self.cls_mlm = TFBertMLMHead(config, name='cls_mlm')
|
||||||
self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
|
self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
|
||||||
|
|
||||||
# self.apply(self.init_weights) # TODO check added weights initialization
|
|
||||||
self.tie_weights()
|
self.tie_weights()
|
||||||
|
|
||||||
def tie_weights(self):
|
def tie_weights(self):
|
||||||
@@ -695,6 +702,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
|
|||||||
"""
|
"""
|
||||||
pass # TODO add weights tying
|
pass # TODO add weights tying
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
outputs = self.bert(inputs, training=training)
|
outputs = self.bert(inputs, training=training)
|
||||||
|
|
||||||
@@ -704,14 +712,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
|
|||||||
|
|
||||||
outputs = (prediction_scores, seq_relationship_score,) + outputs[2:] # add hidden states and attention if they are here
|
outputs = (prediction_scores, seq_relationship_score,) + outputs[2:] # add hidden states and attention if they are here
|
||||||
|
|
||||||
# if masked_lm_labels is not None and next_sentence_label is not None:
|
|
||||||
# loss_fct = CrossEntropyLoss(ignore_index=-1)
|
|
||||||
# masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
|
|
||||||
# next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
|
|
||||||
# total_loss = masked_lm_loss + next_sentence_loss
|
|
||||||
# outputs = (total_loss,) + outputs
|
|
||||||
# TODO add example with losses using model.compile and a dictionary of losses (give names to the output layers)
|
|
||||||
|
|
||||||
return outputs # prediction_scores, seq_relationship_score, (hidden_states), (attentions)
|
return outputs # prediction_scores, seq_relationship_score, (hidden_states), (attentions)
|
||||||
|
|
||||||
|
|
||||||
@@ -753,7 +753,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
|
|||||||
self.bert = TFBertMainLayer(config, name='bert')
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
self.cls_mlm = TFBertMLMHead(config, name='cls_mlm')
|
self.cls_mlm = TFBertMLMHead(config, name='cls_mlm')
|
||||||
|
|
||||||
# self.apply(self.init_weights)
|
|
||||||
self.tie_weights()
|
self.tie_weights()
|
||||||
|
|
||||||
def tie_weights(self):
|
def tie_weights(self):
|
||||||
@@ -761,6 +760,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
|
|||||||
"""
|
"""
|
||||||
pass # TODO add weights tying
|
pass # TODO add weights tying
|
||||||
|
|
||||||
|
@tf.function
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
outputs = self.bert(inputs, training=training)
|
outputs = self.bert(inputs, training=training)
|
||||||
|
|
||||||
@@ -768,11 +768,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
|
|||||||
prediction_scores = self.cls_mlm(sequence_output)
|
prediction_scores = self.cls_mlm(sequence_output)
|
||||||
|
|
||||||
outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here
|
outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here
|
||||||
# if masked_lm_labels is not None:
|
|
||||||
# loss_fct = CrossEntropyLoss(ignore_index=-1)
|
|
||||||
# masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
|
|
||||||
# outputs = (masked_lm_loss,) + outputs
|
|
||||||
# TODO example with losses
|
|
||||||
|
|
||||||
return outputs # prediction_scores, (hidden_states), (attentions)
|
return outputs # prediction_scores, (hidden_states), (attentions)
|
||||||
|
|
||||||
@@ -815,8 +810,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
|
|||||||
self.bert = TFBertMainLayer(config, name='bert')
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
|
self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
|
||||||
|
|
||||||
# self.apply(self.init_weights)
|
@tf.function
|
||||||
|
|
||||||
def call(self, inputs, training=False):
|
def call(self, inputs, training=False):
|
||||||
outputs = self.bert(inputs, training=training)
|
outputs = self.bert(inputs, training=training)
|
||||||
|
|
||||||
@@ -824,9 +818,299 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
|
|||||||
seq_relationship_score = self.cls_nsp(pooled_output)
|
seq_relationship_score = self.cls_nsp(pooled_output)
|
||||||
|
|
||||||
outputs = (seq_relationship_score,) + outputs[2:] # add hidden states and attention if they are here
|
outputs = (seq_relationship_score,) + outputs[2:] # add hidden states and attention if they are here
|
||||||
# if next_sentence_label is not None:
|
|
||||||
# loss_fct = CrossEntropyLoss(ignore_index=-1)
|
|
||||||
# next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
|
|
||||||
# outputs = (next_sentence_loss,) + outputs
|
|
||||||
|
|
||||||
return outputs # seq_relationship_score, (hidden_states), (attentions)
|
return outputs # seq_relationship_score, (hidden_states), (attentions)
|
||||||
|
|
||||||
|
|
||||||
|
@add_start_docstrings("""Bert Model transformer with a sequence classification/regression head on top (a linear layer on top of
|
||||||
|
the pooled output) e.g. for GLUE tasks. """,
|
||||||
|
BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING)
|
||||||
|
class TFBertForSequenceClassification(TFBertPreTrainedModel):
|
||||||
|
r"""
|
||||||
|
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
|
||||||
|
Labels for computing the sequence classification/regression loss.
|
||||||
|
Indices should be in ``[0, ..., config.num_labels - 1]``.
|
||||||
|
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss),
|
||||||
|
If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy).
|
||||||
|
|
||||||
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
|
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
|
Classification (or regression if config.num_labels==1) loss.
|
||||||
|
**logits**: ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)``
|
||||||
|
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
||||||
|
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
|
||||||
|
of shape ``(batch_size, sequence_length, hidden_size)``:
|
||||||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||||
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
|
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||||
|
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||||
|
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||||
|
outputs = model(input_ids, labels=labels)
|
||||||
|
loss, logits = outputs[:2]
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, config):
|
||||||
|
super(TFBertForSequenceClassification, self).__init__(config)
|
||||||
|
self.num_labels = config.num_labels
|
||||||
|
|
||||||
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
|
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||||
|
self.classifier = tf.keras.layers.Dense(config.num_labels, name='classifier')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def call(self, inputs, training=False):
|
||||||
|
outputs = self.bert(inputs, training=training)
|
||||||
|
|
||||||
|
pooled_output = outputs[1]
|
||||||
|
|
||||||
|
pooled_output = self.dropout(pooled_output)
|
||||||
|
logits = self.classifier(pooled_output)
|
||||||
|
|
||||||
|
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
|
||||||
|
|
||||||
|
return outputs # logits, (hidden_states), (attentions)
|
||||||
|
|
||||||
|
|
||||||
|
@add_start_docstrings("""Bert Model with a multiple choice classification head on top (a linear layer on top of
|
||||||
|
the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
|
||||||
|
BERT_START_DOCSTRING)
|
||||||
|
class TFBertForMultipleChoice(TFBertPreTrainedModel):
|
||||||
|
r"""
|
||||||
|
Inputs:
|
||||||
|
**input_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
|
||||||
|
Indices of input sequence tokens in the vocabulary.
|
||||||
|
The second dimension of the input (`num_choices`) indicates the number of choices to score.
|
||||||
|
To match pre-training, BERT input sequence should be formatted with [CLS] and [SEP] tokens as follows:
|
||||||
|
|
||||||
|
(a) For sequence pairs:
|
||||||
|
|
||||||
|
``tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]``
|
||||||
|
|
||||||
|
``token_type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1``
|
||||||
|
|
||||||
|
(b) For single sequences:
|
||||||
|
|
||||||
|
``tokens: [CLS] the dog is hairy . [SEP]``
|
||||||
|
|
||||||
|
``token_type_ids: 0 0 0 0 0 0 0``
|
||||||
|
|
||||||
|
Indices can be obtained using :class:`pytorch_transformers.BertTokenizer`.
|
||||||
|
See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and
|
||||||
|
:func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
|
||||||
|
**token_type_ids**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
|
||||||
|
Segment token indices to indicate first and second portions of the inputs.
|
||||||
|
The second dimension of the input (`num_choices`) indicates the number of choices to score.
|
||||||
|
Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
|
||||||
|
corresponds to a `sentence B` token
|
||||||
|
(see `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_ for more details).
|
||||||
|
**attention_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
|
||||||
|
Mask to avoid performing attention on padding token indices.
|
||||||
|
The second dimension of the input (`num_choices`) indicates the number of choices to score.
|
||||||
|
Mask values selected in ``[0, 1]``:
|
||||||
|
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||||
|
**head_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(num_heads,)`` or ``(num_layers, num_heads)``:
|
||||||
|
Mask to nullify selected heads of the self-attention modules.
|
||||||
|
Mask values selected in ``[0, 1]``:
|
||||||
|
``1`` indicates the head is **not masked**, ``0`` indicates the head is **masked**.
|
||||||
|
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
|
||||||
|
Labels for computing the multiple choice classification loss.
|
||||||
|
Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension
|
||||||
|
of the input tensors. (see `input_ids` above)
|
||||||
|
|
||||||
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
|
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
|
Classification loss.
|
||||||
|
**classification_scores**: ``torch.FloatTensor`` of shape ``(batch_size, num_choices)`` where `num_choices` is the size of the second dimension
|
||||||
|
of the input tensors. (see `input_ids` above).
|
||||||
|
Classification scores (before SoftMax).
|
||||||
|
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
|
||||||
|
of shape ``(batch_size, sequence_length, hidden_size)``:
|
||||||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||||
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
|
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
|
||||||
|
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
|
||||||
|
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
||||||
|
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
|
||||||
|
outputs = model(input_ids, labels=labels)
|
||||||
|
loss, classification_scores = outputs[:2]
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, config):
|
||||||
|
super(TFBertForMultipleChoice, self).__init__(config)
|
||||||
|
|
||||||
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
|
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||||
|
self.classifier = tf.keras.layers.Dense(1, name='classifier')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def call(self, inputs, training=False):
|
||||||
|
if not isinstance(inputs, (dict, tuple, list)):
|
||||||
|
input_ids = inputs
|
||||||
|
attention_mask, head_mask, position_ids, token_type_ids = None, None, None, None
|
||||||
|
elif isinstance(inputs, (tuple, list)):
|
||||||
|
input_ids = inputs[0]
|
||||||
|
attention_mask = inputs[1] if len(inputs) > 1 else None
|
||||||
|
token_type_ids = inputs[2] if len(inputs) > 2 else None
|
||||||
|
position_ids = inputs[3] if len(inputs) > 3 else None
|
||||||
|
head_mask = inputs[4] if len(inputs) > 4 else None
|
||||||
|
assert len(inputs) <= 5, "Too many inputs."
|
||||||
|
else:
|
||||||
|
input_ids = inputs.get('input_ids')
|
||||||
|
attention_mask = inputs.get('attention_mask', None)
|
||||||
|
token_type_ids = inputs.get('token_type_ids', None)
|
||||||
|
position_ids = inputs.get('position_ids', None)
|
||||||
|
head_mask = inputs.get('head_mask', None)
|
||||||
|
assert len(inputs) <= 5, "Too many inputs."
|
||||||
|
|
||||||
|
num_choices = tf.shape(input_ids)[1]
|
||||||
|
seq_length = tf.shape(input_ids)[2]
|
||||||
|
|
||||||
|
flat_input_ids = tf.reshape(input_ids, (-1, seq_length))
|
||||||
|
flat_attention_mask = tf.reshape(attention_mask, (-1, seq_length)) if attention_mask is not None else None
|
||||||
|
flat_token_type_ids = tf.reshape(token_type_ids, (-1, seq_length)) if token_type_ids is not None else None
|
||||||
|
flat_position_ids = tf.reshape(position_ids, (-1, seq_length)) if position_ids is not None else None
|
||||||
|
|
||||||
|
flat_inputs = [flat_input_ids, flat_attention_mask, flat_token_type_ids, flat_position_ids, head_mask]
|
||||||
|
|
||||||
|
outputs = self.bert(flat_inputs, training=training)
|
||||||
|
|
||||||
|
pooled_output = outputs[1]
|
||||||
|
|
||||||
|
pooled_output = self.dropout(pooled_output)
|
||||||
|
logits = self.classifier(pooled_output)
|
||||||
|
reshaped_logits = tf.reshape(logits, (-1, num_choices))
|
||||||
|
|
||||||
|
outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are here
|
||||||
|
|
||||||
|
return outputs # reshaped_logits, (hidden_states), (attentions)
|
||||||
|
|
||||||
|
|
||||||
|
@add_start_docstrings("""Bert Model with a token classification head on top (a linear layer on top of
|
||||||
|
the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
|
||||||
|
BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING)
|
||||||
|
class TFBertForTokenClassification(TFBertPreTrainedModel):
|
||||||
|
r"""
|
||||||
|
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
|
Labels for computing the token classification loss.
|
||||||
|
Indices should be in ``[0, ..., config.num_labels - 1]``.
|
||||||
|
|
||||||
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
|
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
|
Classification loss.
|
||||||
|
**scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, config.num_labels)``
|
||||||
|
Classification scores (before SoftMax).
|
||||||
|
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
|
||||||
|
of shape ``(batch_size, sequence_length, hidden_size)``:
|
||||||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||||
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
|
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
|
||||||
|
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||||
|
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
|
||||||
|
outputs = model(input_ids, labels=labels)
|
||||||
|
loss, scores = outputs[:2]
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, config):
|
||||||
|
super(TFBertForTokenClassification, self).__init__(config)
|
||||||
|
self.num_labels = config.num_labels
|
||||||
|
|
||||||
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
|
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||||
|
self.classifier = tf.keras.layers.Dense(config.num_labels, name='classifier')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def call(self, inputs, training=False):
|
||||||
|
outputs = self.bert(inputs, training=training)
|
||||||
|
|
||||||
|
sequence_output = outputs[0]
|
||||||
|
|
||||||
|
sequence_output = self.dropout(sequence_output)
|
||||||
|
logits = self.classifier(sequence_output)
|
||||||
|
|
||||||
|
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
|
||||||
|
|
||||||
|
return outputs # scores, (hidden_states), (attentions)
|
||||||
|
|
||||||
|
|
||||||
|
@add_start_docstrings("""Bert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
|
||||||
|
the hidden-states output to compute `span start logits` and `span end logits`). """,
|
||||||
|
BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING)
|
||||||
|
class TFBertForQuestionAnswering(TFBertPreTrainedModel):
|
||||||
|
r"""
|
||||||
|
**start_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
|
||||||
|
Labels for position (index) of the start of the labelled span for computing the token classification loss.
|
||||||
|
Positions are clamped to the length of the sequence (`sequence_length`).
|
||||||
|
Position outside of the sequence are not taken into account for computing the loss.
|
||||||
|
**end_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
|
||||||
|
Labels for position (index) of the end of the labelled span for computing the token classification loss.
|
||||||
|
Positions are clamped to the length of the sequence (`sequence_length`).
|
||||||
|
Position outside of the sequence are not taken into account for computing the loss.
|
||||||
|
|
||||||
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
|
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
|
||||||
|
Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
|
||||||
|
**start_scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length,)``
|
||||||
|
Span-start scores (before SoftMax).
|
||||||
|
**end_scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length,)``
|
||||||
|
Span-end scores (before SoftMax).
|
||||||
|
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
|
||||||
|
of shape ``(batch_size, sequence_length, hidden_size)``:
|
||||||
|
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||||
|
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
|
||||||
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
|
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
|
||||||
|
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
|
||||||
|
start_positions = torch.tensor([1])
|
||||||
|
end_positions = torch.tensor([3])
|
||||||
|
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||||
|
loss, start_scores, end_scores = outputs[:2]
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, config):
|
||||||
|
super(TFBertForQuestionAnswering, self).__init__(config)
|
||||||
|
self.num_labels = config.num_labels
|
||||||
|
|
||||||
|
self.bert = TFBertMainLayer(config, name='bert')
|
||||||
|
self.qa_outputs = tf.keras.layers.Dense(config.num_labels, name='qa_outputs')
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def call(self, inputs, training=False):
|
||||||
|
outputs = self.bert(inputs, training=training)
|
||||||
|
|
||||||
|
sequence_output = outputs[0]
|
||||||
|
|
||||||
|
logits = self.qa_outputs(sequence_output)
|
||||||
|
start_logits, end_logits = tf.split(logits, 2, axis=-1)
|
||||||
|
start_logits = tf.squeeze(start_logits, axis=-1)
|
||||||
|
end_logits = tf.squeeze(end_logits, axis=-1)
|
||||||
|
|
||||||
|
outputs = (start_logits, end_logits,) + outputs[2:]
|
||||||
|
|
||||||
|
return outputs # start_logits, end_logits, (hidden_states), (attentions)
|
||||||
|
|||||||
@@ -21,15 +21,18 @@ import shutil
|
|||||||
import pytest
|
import pytest
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from pytorch_transformers import (AutoConfig, BertConfig,
|
try:
|
||||||
|
from pytorch_transformers import (AutoConfig, BertConfig,
|
||||||
AutoModel, BertModel,
|
AutoModel, BertModel,
|
||||||
AutoModelWithLMHead, BertForMaskedLM,
|
AutoModelWithLMHead, BertForMaskedLM,
|
||||||
AutoModelForSequenceClassification, BertForSequenceClassification,
|
AutoModelForSequenceClassification, BertForSequenceClassification,
|
||||||
AutoModelForQuestionAnswering, BertForQuestionAnswering)
|
AutoModelForQuestionAnswering, BertForQuestionAnswering)
|
||||||
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
|
|
||||||
class AutoModelTest(unittest.TestCase):
|
class AutoModelTest(unittest.TestCase):
|
||||||
|
|||||||
@@ -20,21 +20,26 @@ import unittest
|
|||||||
import shutil
|
import shutil
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
|
from pytorch_transformers import is_torch_available
|
||||||
BertForNextSentencePrediction, BertForPreTraining,
|
|
||||||
BertForQuestionAnswering, BertForSequenceClassification,
|
|
||||||
BertForTokenClassification, BertForMultipleChoice)
|
|
||||||
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
|
||||||
|
BertForNextSentencePrediction, BertForPreTraining,
|
||||||
|
BertForQuestionAnswering, BertForSequenceClassification,
|
||||||
|
BertForTokenClassification, BertForMultipleChoice)
|
||||||
|
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
|
|
||||||
class BertModelTest(CommonTestCases.CommonModelTester):
|
class BertModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction,
|
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction,
|
||||||
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
|
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
|
||||||
BertForTokenClassification)
|
BertForTokenClassification) if is_torch_available() else ()
|
||||||
|
|
||||||
class BertModelTester(object):
|
class BertModelTester(object):
|
||||||
|
|
||||||
|
|||||||
@@ -25,12 +25,16 @@ import uuid
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
import logging
|
import logging
|
||||||
|
import pytest
|
||||||
|
|
||||||
import torch
|
try:
|
||||||
|
import torch
|
||||||
|
|
||||||
from pytorch_transformers import (PretrainedConfig, PreTrainedModel,
|
from pytorch_transformers import (PretrainedConfig, PreTrainedModel,
|
||||||
BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||||
GPT2LMHeadModel, GPT2Config, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
|
GPT2LMHeadModel, GPT2Config, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
|
|
||||||
def _config_zero_init(config):
|
def _config_zero_init(config):
|
||||||
|
|||||||
@@ -17,9 +17,15 @@ from __future__ import division
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
import pytest
|
||||||
|
|
||||||
from pytorch_transformers import (DistilBertConfig, DistilBertModel, DistilBertForMaskedLM,
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pytorch_transformers import (DistilBertConfig, DistilBertModel, DistilBertForMaskedLM,
|
||||||
DistilBertForQuestionAnswering, DistilBertForSequenceClassification)
|
DistilBertForQuestionAnswering, DistilBertForSequenceClassification)
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
@@ -28,7 +34,7 @@ from .configuration_common_test import ConfigTester
|
|||||||
class DistilBertModelTest(CommonTestCases.CommonModelTester):
|
class DistilBertModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering,
|
all_model_classes = (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering,
|
||||||
DistilBertForSequenceClassification)
|
DistilBertForSequenceClassification) if is_torch_available() else None
|
||||||
test_pruning = True
|
test_pruning = True
|
||||||
test_torchscript = True
|
test_torchscript = True
|
||||||
test_resize_embeddings = True
|
test_resize_embeddings = True
|
||||||
|
|||||||
@@ -20,9 +20,13 @@ import unittest
|
|||||||
import pytest
|
import pytest
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
from pytorch_transformers import (GPT2Config, GPT2Model, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
|
try:
|
||||||
|
from pytorch_transformers import (GPT2Config, GPT2Model, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||||
GPT2LMHeadModel, GPT2DoubleHeadsModel)
|
GPT2LMHeadModel, GPT2DoubleHeadsModel)
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
@@ -30,7 +34,7 @@ from .configuration_common_test import ConfigTester
|
|||||||
|
|
||||||
class GPT2ModelTest(CommonTestCases.CommonModelTester):
|
class GPT2ModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel)
|
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
|
||||||
|
|
||||||
class GPT2ModelTester(object):
|
class GPT2ModelTester(object):
|
||||||
|
|
||||||
|
|||||||
@@ -20,9 +20,13 @@ import unittest
|
|||||||
import pytest
|
import pytest
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
try:
|
||||||
|
from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||||
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
|
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
@@ -30,7 +34,7 @@ from .configuration_common_test import ConfigTester
|
|||||||
|
|
||||||
class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
|
class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
|
all_model_classes = (OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
|
||||||
|
|
||||||
class OpenAIGPTModelTester(object):
|
class OpenAIGPTModelTester(object):
|
||||||
|
|
||||||
|
|||||||
@@ -19,10 +19,15 @@ from __future__ import print_function
|
|||||||
import unittest
|
import unittest
|
||||||
import shutil
|
import shutil
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
|
||||||
|
|
||||||
from pytorch_transformers import (RobertaConfig, RobertaModel, RobertaForMaskedLM, RobertaForSequenceClassification)
|
from pytorch_transformers import is_torch_available
|
||||||
from pytorch_transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
|
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
from pytorch_transformers import (RobertaConfig, RobertaModel, RobertaForMaskedLM, RobertaForSequenceClassification)
|
||||||
|
from pytorch_transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
@@ -30,7 +35,7 @@ from .configuration_common_test import ConfigTester
|
|||||||
|
|
||||||
class RobertaModelTest(CommonTestCases.CommonModelTester):
|
class RobertaModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (RobertaForMaskedLM, RobertaModel)
|
all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else ()
|
||||||
|
|
||||||
class RobertaModelTester(object):
|
class RobertaModelTester(object):
|
||||||
|
|
||||||
|
|||||||
@@ -24,21 +24,27 @@ import sys
|
|||||||
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
|
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
|
|
||||||
|
from pytorch_transformers import BertConfig, is_tf_available
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
from pytorch_transformers.modeling_tf_bert import (TFBertModel, TFBertForMaskedLM,
|
||||||
from pytorch_transformers import (BertConfig)
|
TFBertForNextSentencePrediction,
|
||||||
from pytorch_transformers.modeling_tf_bert import TFBertModel, TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
TFBertForPreTraining,
|
||||||
|
TFBertForSequenceClassification,
|
||||||
|
TFBertForMultipleChoice,
|
||||||
|
TFBertForTokenClassification,
|
||||||
|
TFBertForQuestionAnswering,
|
||||||
|
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pytestmark = pytest.mark.skip("Require TensorFlow")
|
||||||
|
|
||||||
|
|
||||||
class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
|
class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (TFBertModel,)
|
all_model_classes = (TFBertModel, TFBertForMaskedLM, TFBertForNextSentencePrediction,
|
||||||
# BertForMaskedLM, BertForNextSentencePrediction,
|
TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification,
|
||||||
# BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
|
TFBertForTokenClassification) if is_tf_available() else ()
|
||||||
# BertForTokenClassification)
|
|
||||||
|
|
||||||
class TFBertModelTester(object):
|
class TFBertModelTester(object):
|
||||||
|
|
||||||
@@ -123,14 +129,8 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
|
|||||||
|
|
||||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
|
||||||
def check_loss_output(self, result):
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(result["loss"].size()),
|
|
||||||
[])
|
|
||||||
|
|
||||||
def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
model = TFBertModel(config=config)
|
model = TFBertModel(config=config)
|
||||||
# model.eval()
|
|
||||||
inputs = {'input_ids': input_ids,
|
inputs = {'input_ids': input_ids,
|
||||||
'attention_mask': input_mask,
|
'attention_mask': input_mask,
|
||||||
'token_type_ids': token_type_ids}
|
'token_type_ids': token_type_ids}
|
||||||
@@ -152,125 +152,115 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
|
|||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
pass
|
model = TFBertForMaskedLM(config=config)
|
||||||
# model = BertForMaskedLM(config=config)
|
inputs = {'input_ids': input_ids,
|
||||||
# model.eval()
|
'attention_mask': input_mask,
|
||||||
# loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels)
|
'token_type_ids': token_type_ids}
|
||||||
# result = {
|
prediction_scores, = model(inputs)
|
||||||
# "loss": loss,
|
result = {
|
||||||
# "prediction_scores": prediction_scores,
|
"prediction_scores": prediction_scores.numpy(),
|
||||||
# }
|
}
|
||||||
# self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
# list(result["prediction_scores"].size()),
|
list(result["prediction_scores"].shape),
|
||||||
# [self.batch_size, self.seq_length, self.vocab_size])
|
[self.batch_size, self.seq_length, self.vocab_size])
|
||||||
# self.check_loss_output(result)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_next_sequence_prediction(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def create_and_check_bert_for_next_sequence_prediction(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
pass
|
model = TFBertForNextSentencePrediction(config=config)
|
||||||
# model = BertForNextSentencePrediction(config=config)
|
inputs = {'input_ids': input_ids,
|
||||||
# model.eval()
|
'attention_mask': input_mask,
|
||||||
# loss, seq_relationship_score = model(input_ids, token_type_ids, input_mask, sequence_labels)
|
'token_type_ids': token_type_ids}
|
||||||
# result = {
|
seq_relationship_score, = model(inputs)
|
||||||
# "loss": loss,
|
result = {
|
||||||
# "seq_relationship_score": seq_relationship_score,
|
"seq_relationship_score": seq_relationship_score.numpy(),
|
||||||
# }
|
}
|
||||||
# self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
# list(result["seq_relationship_score"].size()),
|
list(result["seq_relationship_score"].shape),
|
||||||
# [self.batch_size, 2])
|
[self.batch_size, 2])
|
||||||
# self.check_loss_output(result)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
pass
|
model = TFBertForPreTraining(config=config)
|
||||||
# model = BertForPreTraining(config=config)
|
inputs = {'input_ids': input_ids,
|
||||||
# model.eval()
|
'attention_mask': input_mask,
|
||||||
# loss, prediction_scores, seq_relationship_score = model(input_ids, token_type_ids, input_mask, token_labels, sequence_labels)
|
'token_type_ids': token_type_ids}
|
||||||
# result = {
|
prediction_scores, seq_relationship_score = model(inputs)
|
||||||
# "loss": loss,
|
result = {
|
||||||
# "prediction_scores": prediction_scores,
|
"prediction_scores": prediction_scores.numpy(),
|
||||||
# "seq_relationship_score": seq_relationship_score,
|
"seq_relationship_score": seq_relationship_score.numpy(),
|
||||||
# }
|
}
|
||||||
# self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
# list(result["prediction_scores"].size()),
|
list(result["prediction_scores"].shape),
|
||||||
# [self.batch_size, self.seq_length, self.vocab_size])
|
[self.batch_size, self.seq_length, self.vocab_size])
|
||||||
# self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
# list(result["seq_relationship_score"].size()),
|
list(result["seq_relationship_score"].shape),
|
||||||
# [self.batch_size, 2])
|
[self.batch_size, 2])
|
||||||
# self.check_loss_output(result)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
|
||||||
pass
|
|
||||||
# model = BertForQuestionAnswering(config=config)
|
|
||||||
# model.eval()
|
|
||||||
# loss, start_logits, end_logits = model(input_ids, token_type_ids, input_mask, sequence_labels, sequence_labels)
|
|
||||||
# result = {
|
|
||||||
# "loss": loss,
|
|
||||||
# "start_logits": start_logits,
|
|
||||||
# "end_logits": end_logits,
|
|
||||||
# }
|
|
||||||
# self.parent.assertListEqual(
|
|
||||||
# list(result["start_logits"].size()),
|
|
||||||
# [self.batch_size, self.seq_length])
|
|
||||||
# self.parent.assertListEqual(
|
|
||||||
# list(result["end_logits"].size()),
|
|
||||||
# [self.batch_size, self.seq_length])
|
|
||||||
# self.check_loss_output(result)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
pass
|
config.num_labels = self.num_labels
|
||||||
# config.num_labels = self.num_labels
|
model = TFBertForSequenceClassification(config=config)
|
||||||
# model = BertForSequenceClassification(config)
|
inputs = {'input_ids': input_ids,
|
||||||
# model.eval()
|
'attention_mask': input_mask,
|
||||||
# loss, logits = model(input_ids, token_type_ids, input_mask, sequence_labels)
|
'token_type_ids': token_type_ids}
|
||||||
# result = {
|
logits, = model(inputs)
|
||||||
# "loss": loss,
|
result = {
|
||||||
# "logits": logits,
|
"logits": logits.numpy(),
|
||||||
# }
|
}
|
||||||
# self.parent.assertListEqual(
|
self.parent.assertListEqual(
|
||||||
# list(result["logits"].size()),
|
list(result["logits"].shape),
|
||||||
# [self.batch_size, self.num_labels])
|
[self.batch_size, self.num_labels])
|
||||||
# self.check_loss_output(result)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
|
||||||
pass
|
|
||||||
# config.num_labels = self.num_labels
|
|
||||||
# model = BertForTokenClassification(config=config)
|
|
||||||
# model.eval()
|
|
||||||
# loss, logits = model(input_ids, token_type_ids, input_mask, token_labels)
|
|
||||||
# result = {
|
|
||||||
# "loss": loss,
|
|
||||||
# "logits": logits,
|
|
||||||
# }
|
|
||||||
# self.parent.assertListEqual(
|
|
||||||
# list(result["logits"].size()),
|
|
||||||
# [self.batch_size, self.seq_length, self.num_labels])
|
|
||||||
# self.check_loss_output(result)
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
pass
|
config.num_choices = self.num_choices
|
||||||
# config.num_choices = self.num_choices
|
model = TFBertForMultipleChoice(config=config)
|
||||||
# model = BertForMultipleChoice(config=config)
|
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
|
||||||
# model.eval()
|
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
|
||||||
# multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
|
||||||
# multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
inputs = {'input_ids': multiple_choice_inputs_ids,
|
||||||
# multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
|
'attention_mask': multiple_choice_input_mask,
|
||||||
# loss, logits = model(multiple_choice_inputs_ids,
|
'token_type_ids': multiple_choice_token_type_ids}
|
||||||
# multiple_choice_token_type_ids,
|
logits, = model(inputs)
|
||||||
# multiple_choice_input_mask,
|
result = {
|
||||||
# choice_labels)
|
"logits": logits.numpy(),
|
||||||
# result = {
|
}
|
||||||
# "loss": loss,
|
self.parent.assertListEqual(
|
||||||
# "logits": logits,
|
list(result["logits"].shape),
|
||||||
# }
|
[self.batch_size, self.num_choices])
|
||||||
# self.parent.assertListEqual(
|
|
||||||
# list(result["logits"].size()),
|
|
||||||
# [self.batch_size, self.num_choices])
|
def create_and_check_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
# self.check_loss_output(result)
|
config.num_labels = self.num_labels
|
||||||
|
model = TFBertForTokenClassification(config=config)
|
||||||
|
inputs = {'input_ids': input_ids,
|
||||||
|
'attention_mask': input_mask,
|
||||||
|
'token_type_ids': token_type_ids}
|
||||||
|
logits, = model(inputs)
|
||||||
|
result = {
|
||||||
|
"logits": logits.numpy(),
|
||||||
|
}
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(result["logits"].shape),
|
||||||
|
[self.batch_size, self.seq_length, self.num_labels])
|
||||||
|
|
||||||
|
|
||||||
|
def create_and_check_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
||||||
|
model = TFBertForQuestionAnswering(config=config)
|
||||||
|
inputs = {'input_ids': input_ids,
|
||||||
|
'attention_mask': input_mask,
|
||||||
|
'token_type_ids': token_type_ids}
|
||||||
|
start_logits, end_logits = model(inputs)
|
||||||
|
result = {
|
||||||
|
"start_logits": start_logits.numpy(),
|
||||||
|
"end_logits": end_logits.numpy(),
|
||||||
|
}
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(result["start_logits"].shape),
|
||||||
|
[self.batch_size, self.seq_length])
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(result["end_logits"].shape),
|
||||||
|
[self.batch_size, self.seq_length])
|
||||||
|
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
@@ -287,48 +277,39 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_bert_model(self):
|
def test_bert_model(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_model(*config_and_inputs)
|
self.model_tester.create_and_check_bert_model(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_masked_lm(self):
|
def test_for_masked_lm(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_multiple_choice(self):
|
def test_for_multiple_choice(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_next_sequence_prediction(self):
|
def test_for_next_sequence_prediction(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_pretraining(self):
|
def test_for_pretraining(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_pretraining(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_pretraining(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_question_answering(self):
|
def test_for_question_answering(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_question_answering(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_question_answering(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_sequence_classification(self):
|
def test_for_sequence_classification(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_for_token_classification(self):
|
def test_for_token_classification(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_bert_for_token_classification(*config_and_inputs)
|
self.model_tester.create_and_check_bert_for_token_classification(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
cache_dir = "/tmp/pytorch_transformers_test/"
|
cache_dir = "/tmp/pytorch_transformers_test/"
|
||||||
for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ try:
|
|||||||
from pytorch_transformers import TFPreTrainedModel
|
from pytorch_transformers import TFPreTrainedModel
|
||||||
# from pytorch_transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
# from pytorch_transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pytestmark = pytest.mark.skip("Require TensorFlow")
|
||||||
|
|
||||||
|
|
||||||
def _config_zero_init(config):
|
def _config_zero_init(config):
|
||||||
@@ -50,7 +50,6 @@ class TFCommonTestCases:
|
|||||||
test_pruning = True
|
test_pruning = True
|
||||||
test_resize_embeddings = True
|
test_resize_embeddings = True
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_initialization(self):
|
def test_initialization(self):
|
||||||
pass
|
pass
|
||||||
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
@@ -64,7 +63,6 @@ class TFCommonTestCases:
|
|||||||
# msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
|
# msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_attention_outputs(self):
|
def test_attention_outputs(self):
|
||||||
pass
|
pass
|
||||||
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
@@ -105,7 +103,6 @@ class TFCommonTestCases:
|
|||||||
# self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
|
# self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_headmasking(self):
|
def test_headmasking(self):
|
||||||
pass
|
pass
|
||||||
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
@@ -153,7 +150,6 @@ class TFCommonTestCases:
|
|||||||
# attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
|
# attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_head_pruning(self):
|
def test_head_pruning(self):
|
||||||
pass
|
pass
|
||||||
# if not self.test_pruning:
|
# if not self.test_pruning:
|
||||||
@@ -181,7 +177,6 @@ class TFCommonTestCases:
|
|||||||
# attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
|
# attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_hidden_states_output(self):
|
def test_hidden_states_output(self):
|
||||||
pass
|
pass
|
||||||
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
@@ -201,7 +196,6 @@ class TFCommonTestCases:
|
|||||||
# [self.model_tester.seq_length, self.model_tester.hidden_size])
|
# [self.model_tester.seq_length, self.model_tester.hidden_size])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_resize_tokens_embeddings(self):
|
def test_resize_tokens_embeddings(self):
|
||||||
pass
|
pass
|
||||||
# original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
# original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
@@ -238,7 +232,6 @@ class TFCommonTestCases:
|
|||||||
# self.assertTrue(models_equal)
|
# self.assertTrue(models_equal)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif('tensorflow' not in sys.modules, reason="requires TensorFlow")
|
|
||||||
def test_tie_model_weights(self):
|
def test_tie_model_weights(self):
|
||||||
pass
|
pass
|
||||||
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|||||||
@@ -21,17 +21,21 @@ import random
|
|||||||
import shutil
|
import shutil
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import torch
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel)
|
try:
|
||||||
from pytorch_transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
|
import torch
|
||||||
|
from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel)
|
||||||
|
from pytorch_transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
|
|
||||||
class TransfoXLModelTest(CommonTestCases.CommonModelTester):
|
class TransfoXLModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel)
|
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel) if is_torch_available() else ()
|
||||||
test_pruning = False
|
test_pruning = False
|
||||||
test_torchscript = False
|
test_torchscript = False
|
||||||
test_resize_embeddings = False
|
test_resize_embeddings = False
|
||||||
|
|||||||
@@ -20,8 +20,14 @@ import unittest
|
|||||||
import shutil
|
import shutil
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification)
|
from pytorch_transformers import is_torch_available
|
||||||
from pytorch_transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP
|
|
||||||
|
try:
|
||||||
|
from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering,
|
||||||
|
XLMForSequenceClassification)
|
||||||
|
from pytorch_transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
@@ -29,9 +35,9 @@ from .configuration_common_test import ConfigTester
|
|||||||
|
|
||||||
class XLMModelTest(CommonTestCases.CommonModelTester):
|
class XLMModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes = (XLMModel, XLMWithLMHeadModel,
|
all_model_classes = (XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering,
|
||||||
XLMForQuestionAnswering, XLMForSequenceClassification)
|
XLMForSequenceClassification) if is_torch_available() else ()
|
||||||
# , XLMForSequenceClassification, XLMForTokenClassification),
|
|
||||||
|
|
||||||
class XLMModelTester(object):
|
class XLMModelTester(object):
|
||||||
|
|
||||||
|
|||||||
@@ -23,10 +23,15 @@ import random
|
|||||||
import shutil
|
import shutil
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import torch
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering)
|
try:
|
||||||
from pytorch_transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
|
import torch
|
||||||
|
|
||||||
|
from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering)
|
||||||
|
from pytorch_transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ids_tensor)
|
||||||
from .configuration_common_test import ConfigTester
|
from .configuration_common_test import ConfigTester
|
||||||
@@ -34,7 +39,7 @@ from .configuration_common_test import ConfigTester
|
|||||||
class XLNetModelTest(CommonTestCases.CommonModelTester):
|
class XLNetModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
all_model_classes=(XLNetModel, XLNetLMHeadModel,
|
all_model_classes=(XLNetModel, XLNetLMHeadModel,
|
||||||
XLNetForSequenceClassification, XLNetForQuestionAnswering)
|
XLNetForSequenceClassification, XLNetForQuestionAnswering) if is_torch_available() else ()
|
||||||
test_pruning = False
|
test_pruning = False
|
||||||
|
|
||||||
class XLNetModelTester(object):
|
class XLNetModelTester(object):
|
||||||
|
|||||||
@@ -18,11 +18,17 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
import os
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
import torch
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
from pytorch_transformers import (AdamW, ConstantLRSchedule, WarmupConstantSchedule,
|
try:
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from pytorch_transformers import (AdamW, ConstantLRSchedule, WarmupConstantSchedule,
|
||||||
WarmupCosineSchedule, WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)
|
WarmupCosineSchedule, WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch")
|
||||||
|
|
||||||
from .tokenization_tests_commons import TemporaryDirectory
|
from .tokenization_tests_commons import TemporaryDirectory
|
||||||
|
|
||||||
@@ -71,8 +77,8 @@ class OptimizationTest(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class ScheduleInitTest(unittest.TestCase):
|
class ScheduleInitTest(unittest.TestCase):
|
||||||
m = torch.nn.Linear(50, 50)
|
m = torch.nn.Linear(50, 50) if is_torch_available() else None
|
||||||
optimizer = AdamW(m.parameters(), lr=10.)
|
optimizer = AdamW(m.parameters(), lr=10.) if is_torch_available() else None
|
||||||
num_steps = 10
|
num_steps = 10
|
||||||
|
|
||||||
def assertListAlmostEqual(self, list1, list2, tol):
|
def assertListAlmostEqual(self, list1, list2, tol):
|
||||||
|
|||||||
@@ -22,20 +22,19 @@ import pytest
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from pytorch_transformers import AutoTokenizer, BertTokenizer, AutoTokenizer, GPT2Tokenizer
|
from pytorch_transformers import AutoTokenizer, BertTokenizer, AutoTokenizer, GPT2Tokenizer
|
||||||
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
from pytorch_transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
|
||||||
from pytorch_transformers.modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
|
|
||||||
|
|
||||||
|
|
||||||
class AutoTokenizerTest(unittest.TestCase):
|
class AutoTokenizerTest(unittest.TestCase):
|
||||||
def test_tokenizer_from_pretrained(self):
|
def test_tokenizer_from_pretrained(self):
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
for model_name in list(BERT_PRETRAINED_CONFIG_ARCHIVE_MAP.keys())[:1]:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
self.assertIsNotNone(tokenizer)
|
self.assertIsNotNone(tokenizer)
|
||||||
self.assertIsInstance(tokenizer, BertTokenizer)
|
self.assertIsInstance(tokenizer, BertTokenizer)
|
||||||
self.assertGreater(len(tokenizer), 0)
|
self.assertGreater(len(tokenizer), 0)
|
||||||
|
|
||||||
for model_name in list(GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
for model_name in list(GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP.keys())[:1]:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
self.assertIsNotNone(tokenizer)
|
self.assertIsNotNone(tokenizer)
|
||||||
self.assertIsInstance(tokenizer, GPT2Tokenizer)
|
self.assertIsInstance(tokenizer, GPT2Tokenizer)
|
||||||
|
|||||||
@@ -16,15 +16,21 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
|
import pytest
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
from pytorch_transformers.tokenization_transfo_xl import TransfoXLTokenizer, VOCAB_FILES_NAMES
|
from pytorch_transformers import is_torch_available
|
||||||
|
|
||||||
from.tokenization_tests_commons import CommonTestCases
|
try:
|
||||||
|
from pytorch_transformers.tokenization_transfo_xl import TransfoXLTokenizer, VOCAB_FILES_NAMES
|
||||||
|
except ImportError:
|
||||||
|
pytestmark = pytest.mark.skip("Require Torch") # TODO: untangle Transfo-XL tokenizer from torch.load and torch.save
|
||||||
|
|
||||||
|
from .tokenization_tests_commons import CommonTestCases
|
||||||
|
|
||||||
class TransfoXLTokenizationTest(CommonTestCases.CommonTokenizerTester):
|
class TransfoXLTokenizationTest(CommonTestCases.CommonTokenizerTester):
|
||||||
|
|
||||||
tokenizer_class = TransfoXLTokenizer
|
tokenizer_class = TransfoXLTokenizer if is_torch_available() else None
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(TransfoXLTokenizationTest, self).setUp()
|
super(TransfoXLTokenizationTest, self).setUp()
|
||||||
|
|||||||
@@ -26,16 +26,20 @@ import sys
|
|||||||
from collections import Counter, OrderedDict
|
from collections import Counter, OrderedDict
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
import torch
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from .file_utils import cached_path
|
from .file_utils import cached_path
|
||||||
from .tokenization_utils import PreTrainedTokenizer
|
from .tokenization_utils import PreTrainedTokenizer
|
||||||
|
|
||||||
if sys.version_info[0] == 2:
|
try:
|
||||||
import cPickle as pickle
|
import torch
|
||||||
else:
|
except ImportError:
|
||||||
import pickle
|
pass
|
||||||
|
|
||||||
|
# if sys.version_info[0] == 2:
|
||||||
|
# import cPickle as pickle
|
||||||
|
# else:
|
||||||
|
# import pickle
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
Reference in New Issue
Block a user