Updating the TensorFlow models to work as expected with tokenizers v3.0.0 (#3684)

* Updating modeling tf files; adding tests

* Merge `encode_plus` and `batch_encode_plus`
This commit is contained in:
Lysandre Debut
2020-04-08 16:22:44 -04:00
committed by GitHub
parent 500aa12318
commit 6435b9f908
14 changed files with 129 additions and 12 deletions

View File

@@ -24,6 +24,7 @@ import tensorflow as tf
from .configuration_bert import BertConfig
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list
from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__)
@@ -514,7 +515,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
head_mask = inputs[4] if len(inputs) > 4 else head_mask
inputs_embeds = inputs[5] if len(inputs) > 5 else inputs_embeds
assert len(inputs) <= 6, "Too many inputs."
elif isinstance(inputs, dict):
elif isinstance(inputs, (dict, BatchEncoding)):
input_ids = inputs.get("input_ids")
attention_mask = inputs.get("attention_mask", attention_mask)
token_type_ids = inputs.get("token_type_ids", token_type_ids)