Clean up autoclass doc (#7081)
This commit is contained in:
@@ -1,109 +1,131 @@
|
|||||||
AutoModels
|
AutoClasses
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
|
In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
|
||||||
are supplying to the ``from_pretrained`` method.
|
are supplying to the :obj:`from_pretrained()` method.
|
||||||
|
|
||||||
AutoClasses are here to do this job for you so that you automatically retrieve the relevant model given the name/path
|
AutoClasses are here to do this job for you so that you automatically retrieve the relevant model given the name/path
|
||||||
to the pretrained weights/config/vocabulary:
|
to the pretrained weights/config/vocabulary.
|
||||||
|
|
||||||
Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will directly create a class of the relevant
|
Instantiating one of :class:`~transformers.AutoConfig`, :class:`~transformers.AutoModel`, and
|
||||||
architecture (ex: ``model = AutoModel.from_pretrained('bert-base-cased')`` will create a instance of
|
:class:`~transformers.AutoTokenizer` will directly create a class of the relevant architecture. For instance
|
||||||
:class:`~transformers.BertModel`).
|
|
||||||
|
|
||||||
|
|
||||||
``AutoConfig``
|
.. code-block:: python
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
model = AutoModel.from_pretrained('bert-base-cased')
|
||||||
|
|
||||||
|
will create a model that is an instance of :class:`~transformers.BertModel`).
|
||||||
|
|
||||||
|
There is one class of :obj:`AutoModel` for each task, and for each backend (PyTorch or TensorFlow).
|
||||||
|
|
||||||
|
|
||||||
|
AutoConfig
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoConfig
|
.. autoclass:: transformers.AutoConfig
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoTokenizer``
|
AutoTokenizer
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoTokenizer
|
.. autoclass:: transformers.AutoTokenizer
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoModel``
|
AutoModel
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoModel
|
.. autoclass:: transformers.AutoModel
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoModelForPreTraining``
|
AutoModelForPreTraining
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoModelForPreTraining
|
.. autoclass:: transformers.AutoModelForPreTraining
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoModelWithLMHead``
|
AutoModelWithLMHead
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoModelWithLMHead
|
.. autoclass:: transformers.AutoModelWithLMHead
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoModelForSequenceClassification``
|
AutoModelForSequenceClassification
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoModelForSequenceClassification
|
.. autoclass:: transformers.AutoModelForSequenceClassification
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoModelForQuestionAnswering``
|
AutoModelForMultipleChoice
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.AutoModelForMultipleChoice
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
||||||
|
AutoModelForTokenClassification
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.AutoModelForTokenClassification
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
||||||
|
AutoModelForQuestionAnswering
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoModelForQuestionAnswering
|
.. autoclass:: transformers.AutoModelForQuestionAnswering
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``AutoModelForTokenClassification``
|
TFAutoModel
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.AutoModelForTokenClassification
|
|
||||||
:members:
|
|
||||||
|
|
||||||
``TFAutoModel``
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
.. autoclass:: transformers.TFAutoModel
|
.. autoclass:: transformers.TFAutoModel
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``TFAutoModelForPreTraining``
|
TFAutoModelForPreTraining
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.TFAutoModelForPreTraining
|
.. autoclass:: transformers.TFAutoModelForPreTraining
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``TFAutoModelWithLMHead``
|
TFAutoModelWithLMHead
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.TFAutoModelWithLMHead
|
.. autoclass:: transformers.TFAutoModelWithLMHead
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``TFAutoModelForSequenceClassification``
|
TFAutoModelForSequenceClassification
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.TFAutoModelForSequenceClassification
|
.. autoclass:: transformers.TFAutoModelForSequenceClassification
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``TFAutoModelForQuestionAnswering``
|
TFAutoModelForMultipleChoice
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.TFAutoModelForQuestionAnswering
|
.. autoclass:: transformers.TFAutoModelForMultipleChoice
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``TFAutoModelForTokenClassification``
|
TFAutoModelForTokenClassification
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.TFAutoModelForTokenClassification
|
.. autoclass:: transformers.TFAutoModelForTokenClassification
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
|
TFAutoModelForQuestionAnswering
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.TFAutoModelForQuestionAnswering
|
||||||
|
:members:
|
||||||
|
|||||||
@@ -197,9 +197,7 @@ class AutoConfig:
|
|||||||
This is a generic configuration class that will be instantiated as one of the configuration classes of the library
|
This is a generic configuration class that will be instantiated as one of the configuration classes of the library
|
||||||
when created with the :meth:`~transformers.AutoConfig.from_pretrained` class method.
|
when created with the :meth:`~transformers.AutoConfig.from_pretrained` class method.
|
||||||
|
|
||||||
This method takes care of returning the correct model class instance
|
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||||
based on the `model_type` property of the config object, or when it's missing,
|
|
||||||
falling back to using pattern matching on the `pretrained_model_name_or_path` string.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -222,58 +220,77 @@ class AutoConfig:
|
|||||||
@classmethod
|
@classmethod
|
||||||
@replace_list_option_in_docstrings()
|
@replace_list_option_in_docstrings()
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
|
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
|
||||||
r""" Instantiates one of the configuration classes of the library
|
r"""
|
||||||
from a pre-trained model configuration.
|
Instantiate one of the configuration classes of the library from a pretrained model configuration.
|
||||||
|
|
||||||
The configuration class to instantiate is selected
|
The configuration class to instantiate is selected based on the :obj:`model_type` property of the config
|
||||||
based on the `model_type` property of the config object, or when it's missing,
|
object that is loaded, or when it's missing, by falling back to using pattern matching on
|
||||||
falling back to using pattern matching on the `pretrained_model_name_or_path` string:
|
:obj:`pretrained_model_name_or_path`:
|
||||||
|
|
||||||
List options
|
List options
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pretrained_model_name_or_path (:obj:`string`):
|
pretrained_model_name_or_path (:obj:`str`):
|
||||||
Is either: \
|
Can be either:
|
||||||
- a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``.
|
|
||||||
- a string with the `identifier name` of a pre-trained model configuration that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
|
|
||||||
- a path to a `directory` containing a configuration file saved using the :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
|
|
||||||
- a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``.
|
|
||||||
|
|
||||||
cache_dir (:obj:`string`, optional, defaults to `None`):
|
- A string with the `shortcut name` of a pretrained model configuration to load from cache or
|
||||||
Path to a directory in which a downloaded pre-trained model
|
download, e.g., ``bert-base-uncased``.
|
||||||
configuration should be cached if the standard cache should not be used.
|
- A string with the `identifier name` of a pretrained model configuration that was user-uploaded to
|
||||||
|
our S3, e.g., ``dbmdz/bert-base-german-cased``.
|
||||||
force_download (:obj:`boolean`, optional, defaults to `False`):
|
- A path to a `directory` containing a configuration file saved using the
|
||||||
Force to (re-)download the model weights and configuration files and override the cached versions if they exist.
|
:meth:`~transformers.PretrainedConfig.save_pretrained` method, or the
|
||||||
|
:meth:`~transformers.PretrainedModel.save_pretrained` method, e.g., ``./my_model_directory/``.
|
||||||
resume_download (:obj:`boolean`, optional, defaults to `False`):
|
- A path or url to a saved configuration JSON `file`, e.g.,
|
||||||
Do not delete incompletely received file. Attempt to resume the download if such a file exists.
|
``./my_model_directory/configuration.json``.
|
||||||
|
cache_dir (:obj:`str`, `optional`):
|
||||||
proxies (:obj:`Dict[str, str]`, optional, defaults to `None`):
|
Path to a directory in which a downloaded pretrained model configuration should be cached if the
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, e.g.: :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`.
|
standard cache should not be used.
|
||||||
The proxies are used on each request. See `the requests documentation <https://requests.readthedocs.io/en/master/user/advanced/#proxies>`__ for usage.
|
force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
|
Whether or not to force the (re-)download the model weights and configuration files and override the
|
||||||
return_unused_kwargs (:obj:`boolean`, optional, defaults to `False`):
|
cached versions if they exist.
|
||||||
- If False, then this function returns just the final configuration object.
|
resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
- If True, then this functions returns a tuple `(config, unused_kwargs)` where `unused_kwargs` is a dictionary consisting of the key/value pairs whose keys are not configuration attributes: ie the part of kwargs which has not been used to update `config` and is otherwise ignored.
|
Whether or not to delete incompletely received files. Will attempt to resume the download if such a
|
||||||
|
file exists.
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): key/value pairs with which to update the configuration object after loading.
|
proxies (:obj:`Dict[str, str]`, `optional`):
|
||||||
- The values in kwargs of any keys which are configuration attributes will be used to override the loaded values.
|
A dictionary of proxy servers to use by protocol or endpoint, e.g.,
|
||||||
- Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled by the `return_unused_kwargs` keyword parameter.
|
:obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
|
||||||
|
request.
|
||||||
|
return_unused_kwargs (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
|
If :obj:`False`, then this function returns just the final configuration object.
|
||||||
|
|
||||||
|
If :obj:`True`, then this functions returns a :obj:`Tuple(config, unused_kwargs)` where `unused_kwargs`
|
||||||
|
is a dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e.,
|
||||||
|
the part of ``kwargs`` which has not been used to update ``config`` and is otherwise ignored.
|
||||||
|
kwargs(additional keyword arguments, `optional`):
|
||||||
|
The values in kwargs of any keys which are configuration attributes will be used to override the loaded
|
||||||
|
values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is
|
||||||
|
controlled by the ``return_unused_kwargs`` keyword parameter.
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
|
>>> from transformers import AutoConfig
|
||||||
config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
|
|
||||||
config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
|
|
||||||
config = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False)
|
|
||||||
assert config.output_attentions == True
|
|
||||||
config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True,
|
|
||||||
foo=False, return_unused_kwargs=True)
|
|
||||||
assert config.output_attentions == True
|
|
||||||
assert unused_kwargs == {'foo': False}
|
|
||||||
|
|
||||||
|
>>> # Download configuration from S3 and cache.
|
||||||
|
>>> config = AutoConfig.from_pretrained('bert-base-uncased')
|
||||||
|
|
||||||
|
>>> # Download configuration from S3 (user-uploaded) and cache.
|
||||||
|
>>> config = AutoConfig.from_pretrained('dbmdz/bert-base-german-cased')
|
||||||
|
|
||||||
|
>>> # If configuration file is in a directory (e.g., was saved using `save_pretrained('./test/saved_model/')`).
|
||||||
|
>>> config = AutoConfig.from_pretrained('./test/bert_saved_model/')
|
||||||
|
|
||||||
|
>>> # Load a specific configuration file.
|
||||||
|
>>> config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
|
||||||
|
|
||||||
|
>>> # Change some config attributes when loading a pretrained config.
|
||||||
|
>>> config = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False)
|
||||||
|
>>> config.output_attentions
|
||||||
|
True
|
||||||
|
>>> config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False, return_unused_kwargs=True)
|
||||||
|
>>> config.output_attentions
|
||||||
|
True
|
||||||
|
>>> config.unused_kwargs
|
||||||
|
{'foo': False}
|
||||||
"""
|
"""
|
||||||
config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
|
config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -117,12 +117,11 @@ SLOW_TOKENIZER_MAPPING = {k: v[0] for k, v in TOKENIZER_MAPPING.items()}
|
|||||||
|
|
||||||
|
|
||||||
class AutoTokenizer:
|
class AutoTokenizer:
|
||||||
r""":class:`~transformers.AutoTokenizer` is a generic tokenizer class
|
r"""
|
||||||
that will be instantiated as one of the tokenizer classes of the library
|
This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library
|
||||||
when created with the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)`
|
when created with the :meth:`AutoTokenizer.from_pretrained` class method.
|
||||||
class method.
|
|
||||||
|
|
||||||
This class cannot be instantiated using `__init__()` (throw an error).
|
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -134,45 +133,57 @@ class AutoTokenizer:
|
|||||||
@classmethod
|
@classmethod
|
||||||
@replace_list_option_in_docstrings(SLOW_TOKENIZER_MAPPING)
|
@replace_list_option_in_docstrings(SLOW_TOKENIZER_MAPPING)
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|
||||||
r"""Instantiate one of the tokenizer classes of the library
|
r"""
|
||||||
from a pre-trained model vocabulary.
|
Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.
|
||||||
|
|
||||||
The tokenizer class to instantiate is selected
|
The tokenizer class to instantiate is selected based on the :obj:`model_type` property of the config object
|
||||||
based on the `model_type` property of the config object, or when it's missing,
|
(either passed as an argument or loaded from :obj:`pretrained_model_name_or_path` if possible), or when it's
|
||||||
falling back to using pattern matching on the `pretrained_model_name_or_path` string:
|
missing, by falling back to using pattern matching on :obj:`pretrained_model_name_or_path`:
|
||||||
|
|
||||||
List options
|
List options
|
||||||
|
|
||||||
Params:
|
Params:
|
||||||
pretrained_model_name_or_path: either:
|
pretrained_model_name_or_path (:obj:`str`):
|
||||||
|
Can be either:
|
||||||
|
|
||||||
- a string with the `shortcut name` of a predefined tokenizer to load from cache or download, e.g.: ``bert-base-uncased``.
|
- A string with the `shortcut name` of a predefined tokenizer to load from cache or download, e.g.,
|
||||||
- a string with the `identifier name` of a predefined tokenizer that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
|
``bert-base-uncased``.
|
||||||
- a path to a `directory` containing vocabulary files required by the tokenizer, for instance saved using the :func:`~transformers.PreTrainedTokenizer.save_pretrained` method, e.g.: ``./my_model_directory/``.
|
- A string with the `identifier name` of a predefined tokenizer that was user-uploaded to our S3,
|
||||||
- (not applicable to all derived classes) a path or url to a single saved vocabulary file if and only if the tokenizer only requires a single vocabulary file (e.g. Bert, XLNet), e.g.: ``./my_model_directory/vocab.txt``.
|
e.g., ``dbmdz/bert-base-german-cased``.
|
||||||
|
- A path to a `directory` containing vocabulary files required by the tokenizer, for instance saved
|
||||||
cache_dir: (`optional`) string:
|
using the :func:`~transformers.PreTrainedTokenizer.save_pretrained` method, e.g.,
|
||||||
Path to a directory in which a downloaded predefined tokenizer vocabulary files should be cached if the standard cache should not be used.
|
``./my_model_directory/``.
|
||||||
|
- A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
|
||||||
force_download: (`optional`) boolean, default False:
|
single vocabulary file (like Bert or XLNet), e.g.: ``./my_model_directory/vocab.txt``.
|
||||||
Force to (re-)download the vocabulary files and override the cached versions if they exists.
|
(Not applicable to all derived classes)
|
||||||
|
inputs (additional positional arguments, `optional`):
|
||||||
resume_download: (`optional`) boolean, default False:
|
Will be passed along to the Tokenizer ``__init__()`` method.
|
||||||
Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
|
config (:class:`~transformers.PreTrainedConfig`, `optional`)
|
||||||
|
The configuration object used to dertermine the tokenizer class to instantiate.
|
||||||
proxies: (`optional`) dict, default None:
|
cache_dir (:obj:`str`, `optional`):
|
||||||
A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
|
Path to a directory in which a downloaded pretrained model configuration should be cached if the
|
||||||
The proxies are used on each request.
|
standard cache should not be used.
|
||||||
|
force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
use_fast: (`optional`) boolean, default False:
|
Whether or not to force the (re-)download the model weights and configuration files and override the
|
||||||
Indicate if transformers should try to load the fast version of the tokenizer (True) or use the Python one (False).
|
cached versions if they exist.
|
||||||
|
resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
inputs: (`optional`) positional arguments: will be passed to the Tokenizer ``__init__`` method.
|
Whether or not to delete incompletely received files. Will attempt to resume the download if such a
|
||||||
|
file exists.
|
||||||
kwargs: (`optional`) keyword arguments: will be passed to the Tokenizer ``__init__`` method. Can be used to set special tokens like ``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``. See parameters in the doc string of :class:`~transformers.PreTrainedTokenizer` for details.
|
proxies (:obj:`Dict[str, str]`, `optional`):
|
||||||
|
A dictionary of proxy servers to use by protocol or endpoint, e.g.,
|
||||||
|
:obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
|
||||||
|
request.
|
||||||
|
use_fast (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||||
|
Whether or not to try to load the fast version of the tokenizer.
|
||||||
|
kwargs (additional keyword arguments, `optional`):
|
||||||
|
Will be passed to the Tokenizer ``__init__()`` method. Can be used to set special tokens like
|
||||||
|
``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``,
|
||||||
|
``mask_token``, ``additional_special_tokens``. See parameters in the ``__init__()`` for more details.
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
# Download vocabulary from S3 and cache.
|
# Download vocabulary from S3 and cache.
|
||||||
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
|
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
|
||||||
|
|
||||||
@@ -208,7 +219,10 @@ class AutoTokenizer:
|
|||||||
if isinstance(config, EncoderDecoderConfig):
|
if isinstance(config, EncoderDecoderConfig):
|
||||||
if type(config.decoder) is not type(config.encoder): # noqa: E721
|
if type(config.decoder) is not type(config.encoder): # noqa: E721
|
||||||
logger.warn(
|
logger.warn(
|
||||||
f"The encoder model config class: {config.encoder.__class__} is different from the decoder model config class: {config.decoder.__class}. It is not recommended to use the `AutoTokenizer.from_pretrained(..)` method in this case. Please use the encoder and decoder specific tokenizer classes."
|
f"The encoder model config class: {config.encoder.__class__} is different from the decoder model "
|
||||||
|
f"config class: {config.decoder.__class}. It is not recommended to use the "
|
||||||
|
"`AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder "
|
||||||
|
"specific tokenizer classes."
|
||||||
)
|
)
|
||||||
config = config.encoder
|
config = config.encoder
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user