Add BlenderBot small tokenizer to the init (#13367)
* Add BlenderBot small tokenizer to the init * Update src/transformers/__init__.py Co-authored-by: Suraj Patil <surajp815@gmail.com> * Style * Bugfix Co-authored-by: Suraj Patil <surajp815@gmail.com>
This commit is contained in:
@@ -354,7 +354,7 @@ Flax), PyTorch, and/or TensorFlow.
|
|||||||
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
||||||
| Blenderbot | ✅ | ❌ | ✅ | ✅ | ❌ |
|
| Blenderbot | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||||
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
||||||
| BlenderbotSmall | ✅ | ❌ | ✅ | ✅ | ❌ |
|
| BlenderbotSmall | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||||
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
||||||
| CamemBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
|
| CamemBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||||
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
|
||||||
|
|||||||
@@ -57,6 +57,13 @@ BlenderbotSmallTokenizer
|
|||||||
create_token_type_ids_from_sequences, save_vocabulary
|
create_token_type_ids_from_sequences, save_vocabulary
|
||||||
|
|
||||||
|
|
||||||
|
BlenderbotSmallTokenizerFast
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.BlenderbotSmallTokenizerFast
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
||||||
BlenderbotSmallModel
|
BlenderbotSmallModel
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|||||||
@@ -370,6 +370,7 @@ if is_tokenizers_available():
|
|||||||
_import_structure["models.roformer"].append("RoFormerTokenizerFast")
|
_import_structure["models.roformer"].append("RoFormerTokenizerFast")
|
||||||
_import_structure["models.clip"].append("CLIPTokenizerFast")
|
_import_structure["models.clip"].append("CLIPTokenizerFast")
|
||||||
_import_structure["models.convbert"].append("ConvBertTokenizerFast")
|
_import_structure["models.convbert"].append("ConvBertTokenizerFast")
|
||||||
|
_import_structure["models.blenderbot_small"].append("BlenderbotSmallTokenizerFast")
|
||||||
_import_structure["models.albert"].append("AlbertTokenizerFast")
|
_import_structure["models.albert"].append("AlbertTokenizerFast")
|
||||||
_import_structure["models.bart"].append("BartTokenizerFast")
|
_import_structure["models.bart"].append("BartTokenizerFast")
|
||||||
_import_structure["models.barthez"].append("BarthezTokenizerFast")
|
_import_structure["models.barthez"].append("BarthezTokenizerFast")
|
||||||
@@ -2182,6 +2183,7 @@ if TYPE_CHECKING:
|
|||||||
from .models.barthez import BarthezTokenizerFast
|
from .models.barthez import BarthezTokenizerFast
|
||||||
from .models.bert import BertTokenizerFast
|
from .models.bert import BertTokenizerFast
|
||||||
from .models.big_bird import BigBirdTokenizerFast
|
from .models.big_bird import BigBirdTokenizerFast
|
||||||
|
from .models.blenderbot_small import BlenderbotSmallTokenizerFast
|
||||||
from .models.camembert import CamembertTokenizerFast
|
from .models.camembert import CamembertTokenizerFast
|
||||||
from .models.clip import CLIPTokenizerFast
|
from .models.clip import CLIPTokenizerFast
|
||||||
from .models.convbert import ConvBertTokenizerFast
|
from .models.convbert import ConvBertTokenizerFast
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from ...file_utils import _LazyModule, is_tf_available, is_torch_available
|
from ...file_utils import _LazyModule, is_tf_available, is_tokenizers_available, is_torch_available
|
||||||
|
|
||||||
|
|
||||||
_import_structure = {
|
_import_structure = {
|
||||||
@@ -25,6 +25,9 @@ _import_structure = {
|
|||||||
"tokenization_blenderbot_small": ["BlenderbotSmallTokenizer"],
|
"tokenization_blenderbot_small": ["BlenderbotSmallTokenizer"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if is_tokenizers_available():
|
||||||
|
_import_structure["tokenization_blenderbot_small_fast"] = ["BlenderbotSmallTokenizerFast"]
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
_import_structure["modeling_blenderbot_small"] = [
|
_import_structure["modeling_blenderbot_small"] = [
|
||||||
"BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST",
|
"BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||||
@@ -45,6 +48,9 @@ if TYPE_CHECKING:
|
|||||||
from .configuration_blenderbot_small import BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotSmallConfig
|
from .configuration_blenderbot_small import BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotSmallConfig
|
||||||
from .tokenization_blenderbot_small import BlenderbotSmallTokenizer
|
from .tokenization_blenderbot_small import BlenderbotSmallTokenizer
|
||||||
|
|
||||||
|
if is_tokenizers_available():
|
||||||
|
from .tokenization_blenderbot_small_fast import BlenderbotSmallTokenizerFast
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
from .modeling_blenderbot_small import (
|
from .modeling_blenderbot_small import (
|
||||||
BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST,
|
BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
|||||||
@@ -74,8 +74,8 @@ class BlenderbotSmallTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
ByteLevelBPETokenizer(
|
ByteLevelBPETokenizer(
|
||||||
vocab_file=vocab_file,
|
vocab=vocab_file,
|
||||||
merges_file=merges_file,
|
merges=merges_file,
|
||||||
add_prefix_space=add_prefix_space,
|
add_prefix_space=add_prefix_space,
|
||||||
trim_offsets=trim_offsets,
|
trim_offsets=trim_offsets,
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -47,6 +47,15 @@ class BigBirdTokenizerFast:
|
|||||||
requires_backends(cls, ["tokenizers"])
|
requires_backends(cls, ["tokenizers"])
|
||||||
|
|
||||||
|
|
||||||
|
class BlenderbotSmallTokenizerFast:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["tokenizers"])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_pretrained(cls, *args, **kwargs):
|
||||||
|
requires_backends(cls, ["tokenizers"])
|
||||||
|
|
||||||
|
|
||||||
class CamembertTokenizerFast:
|
class CamembertTokenizerFast:
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
requires_backends(self, ["tokenizers"])
|
requires_backends(self, ["tokenizers"])
|
||||||
|
|||||||
Reference in New Issue
Block a user