Patch: v2.4.1

Flaubert auto tokenizer + tests
cc @julien-c
2020-01-31 14:55:33 -05:00 · 2020-01-31 14:16:52 -05:00 · 2020-01-31 12:05:48 -05:00 · 2020-01-31 12:05:15 -05:00 · 2020-01-31 11:49:32 -05:00 · 2020-01-31 10:58:49 -05:00
11 changed files with 69 additions and 14 deletions
--- a/.circleci/deploy.sh
+++ b/.circleci/deploy.sh
@@ -3,7 +3,7 @@ cd docs
 function deploy_doc(){
 	echo "Creating doc at commit $1 and pushing to folder $2"
 	git checkout $1
-	if [ ! -z "$2" ] 
+	if [ ! -z "$2" ]
 	then
 		if [ -d "$dir/$2" ]; then
 			echo "Directory" $2 "already exists"
@@ -17,7 +17,7 @@ function deploy_doc(){
 	fi
 }

-deploy_doc "master" 
+deploy_doc "master"
 deploy_doc "b33a385" v1.0.0
 deploy_doc "fe02e45" v1.1.0
 deploy_doc "89fd345" v1.2.0
@@ -25,3 +25,4 @@ deploy_doc "fc9faa8" v2.0.0
 deploy_doc "3ddce1d" v2.1.1
 deploy_doc "3616209" v2.2.0
 deploy_doc "d0f8b9a" v2.3.0
+deploy_doc "6664ea9" v2.4.0
--- a/README.md
+++ b/README.md
@@ -60,7 +60,7 @@ Choose the right framework for every part of a model's lifetime
 | [Quick tour: Share your models ](#Quick-tour-of-model-sharing) | Upload and share your fine-tuned models with the community |
 | [Migrating from pytorch-transformers to transformers](#Migrating-from-pytorch-transformers-to-transformers) | Migrating your code from pytorch-transformers to transformers |
 | [Migrating from pytorch-pretrained-bert to pytorch-transformers](#Migrating-from-pytorch-pretrained-bert-to-transformers) | Migrating your code from pytorch-pretrained-bert to transformers |
-| [Documentation][(v2.3.0)](https://huggingface.co/transformers/v2.3.0)[(v2.2.0/v2.2.1/v2.2.2)](https://huggingface.co/transformers/v2.2.0) [(v2.1.1)](https://huggingface.co/transformers/v2.1.1) [(v2.0.0)](https://huggingface.co/transformers/v2.0.0) [(v1.2.0)](https://huggingface.co/transformers/v1.2.0) [(v1.1.0)](https://huggingface.co/transformers/v1.1.0) [(v1.0.0)](https://huggingface.co/transformers/v1.0.0) [(master)](https://huggingface.co/transformers) | Full API documentation and more |
+| [Documentation][(v2.4.0)](https://huggingface.co/transformers/v2.4.0)[(v2.3.0)](https://huggingface.co/transformers/v2.3.0)[(v2.2.0/v2.2.1/v2.2.2)](https://huggingface.co/transformers/v2.2.0) [(v2.1.1)](https://huggingface.co/transformers/v2.1.1) [(v2.0.0)](https://huggingface.co/transformers/v2.0.0) [(v1.2.0)](https://huggingface.co/transformers/v1.2.0) [(v1.1.0)](https://huggingface.co/transformers/v1.1.0) [(v1.0.0)](https://huggingface.co/transformers/v1.0.0) [(master)](https://huggingface.co/transformers) | Full API documentation and more |

 ## Installation

--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,7 +26,7 @@ author = u'huggingface'
 # The short X.Y version
 version = u''
 # The full version, including alpha/beta/rc tags
-release = u'2.4.0'
+release = u'2.4.1'


 # -- General configuration ---------------------------------------------------
--- a/examples/run_generation.py
+++ b/examples/run_generation.py
@@ -221,6 +221,7 @@ def main():
        top_k=args.k,
        top_p=args.p,
        repetition_penalty=args.repetition_penalty,
+        do_sample=True,
    )

    # Batch size == 1. to add more examples please use num_return_sequences > 1
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,8 @@ To create the package for pypi.

   twine upload dist/* -r pypitest
   (pypi suggest using twine as other methods upload files via plaintext.)
+   You may have to specify the repository url, use the following command then:
+   twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/

   Check that you can install it in a virtualenv by running:
   pip install -i https://testpypi.python.org/pypi transformers
@@ -73,7 +75,7 @@ extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "sciki

 setup(
    name="transformers",
-    version="2.4.0",
+    version="2.4.1",
    author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
    author_email="thomas@huggingface.co",
    description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -2,7 +2,7 @@
 # There's no way to ignore "F401 '...' imported but unused" warnings in this
 # module, but to preserve other warnings. So, don't check this module at all.

-__version__ = "2.4.0"
+__version__ = "2.4.1"

 # Work around to update TensorFlow's absl.logging threshold which alters the
 # default Python logging output behavior when present.
--- a/src/transformers/configuration_flaubert.py
+++ b/src/transformers/configuration_flaubert.py
@@ -50,8 +50,8 @@ class FlaubertConfig(XLMConfig):
                Probability to drop layers during training (Fan et al., Reducing Transformer Depth on Demand
                with Structured Dropout. ICLR 2020)
            vocab_size (:obj:`int`, optional, defaults to 30145):
-                Vocabulary size of the XLM model. Defines the different tokens that
-                can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XLMModel`.
+                Vocabulary size of the Flaubert model. Defines the different tokens that
+                can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.FlaubertModel`.
            emb_dim (:obj:`int`, optional, defaults to 2048):
                Dimensionality of the encoder layers and the pooler layer.
            n_layer (:obj:`int`, optional, defaults to 12):
--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@@ -129,8 +129,8 @@ ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict(
        ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        T5_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
        FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
+        XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
    ]
    for key, value, in pretrained_map.items()
 )
@@ -148,9 +148,9 @@ MODEL_MAPPING = OrderedDict(
        (GPT2Config, GPT2Model),
        (TransfoXLConfig, TransfoXLModel),
        (XLNetConfig, XLNetModel),
+        (FlaubertConfig, FlaubertModel),
        (XLMConfig, XLMModel),
        (CTRLConfig, CTRLModel),
-        (FlaubertConfig, FlaubertModel),
    ]
 )

@@ -167,9 +167,9 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
        (GPT2Config, GPT2LMHeadModel),
        (TransfoXLConfig, TransfoXLLMHeadModel),
        (XLNetConfig, XLNetLMHeadModel),
+        (FlaubertConfig, FlaubertWithLMHeadModel),
        (XLMConfig, XLMWithLMHeadModel),
        (CTRLConfig, CTRLLMHeadModel),
-        (FlaubertConfig, FlaubertWithLMHeadModel),
    ]
 )

@@ -186,9 +186,9 @@ MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
        (GPT2Config, GPT2LMHeadModel),
        (TransfoXLConfig, TransfoXLLMHeadModel),
        (XLNetConfig, XLNetLMHeadModel),
+        (FlaubertConfig, FlaubertWithLMHeadModel),
        (XLMConfig, XLMWithLMHeadModel),
        (CTRLConfig, CTRLLMHeadModel),
-        (FlaubertConfig, FlaubertWithLMHeadModel),
    ]
 )

@@ -201,8 +201,8 @@ MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
        (RobertaConfig, RobertaForSequenceClassification),
        (BertConfig, BertForSequenceClassification),
        (XLNetConfig, XLNetForSequenceClassification),
-        (XLMConfig, XLMForSequenceClassification),
        (FlaubertConfig, FlaubertForSequenceClassification),
+        (XLMConfig, XLMForSequenceClassification),
    ]
 )

@@ -213,8 +213,8 @@ MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
        (RobertaConfig, RobertaForQuestionAnswering),
        (BertConfig, BertForQuestionAnswering),
        (XLNetConfig, XLNetForQuestionAnswering),
-        (XLMConfig, XLMForQuestionAnswering),
        (FlaubertConfig, FlaubertForQuestionAnswering),
+        (XLMConfig, XLMForQuestionAnswering),
    ]
 )

--- a/src/transformers/tokenization_auto.py
+++ b/src/transformers/tokenization_auto.py
@@ -25,6 +25,7 @@ from .configuration_auto import (
    CamembertConfig,
    CTRLConfig,
    DistilBertConfig,
+    FlaubertConfig,
    GPT2Config,
    OpenAIGPTConfig,
    RobertaConfig,
@@ -41,6 +42,7 @@ from .tokenization_bert_japanese import BertJapaneseTokenizer
 from .tokenization_camembert import CamembertTokenizer
 from .tokenization_ctrl import CTRLTokenizer
 from .tokenization_distilbert import DistilBertTokenizer
+from .tokenization_flaubert import FlaubertTokenizer
 from .tokenization_gpt2 import GPT2Tokenizer
 from .tokenization_openai import OpenAIGPTTokenizer
 from .tokenization_roberta import RobertaTokenizer
@@ -67,6 +69,7 @@ TOKENIZER_MAPPING = OrderedDict(
        (GPT2Config, GPT2Tokenizer),
        (TransfoXLConfig, TransfoXLTokenizer),
        (XLNetConfig, XLNetTokenizer),
+        (FlaubertConfig, FlaubertTokenizer),
        (XLMConfig, XLMTokenizer),
        (CTRLConfig, CTRLTokenizer),
    ]
--- a/tests/test_modeling_auto.py
+++ b/tests/test_modeling_auto.py
@@ -39,6 +39,14 @@ if is_torch_available():
        BertForQuestionAnswering,
    )
    from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
+    from transformers.modeling_auto import (
+        MODEL_MAPPING,
+        MODEL_FOR_PRETRAINING_MAPPING,
+        MODEL_FOR_QUESTION_ANSWERING_MAPPING,
+        MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+        MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
+        MODEL_WITH_LM_HEAD_MAPPING,
+    )


@require_torch
@@ -127,3 +135,26 @@ class AutoModelTest(unittest.TestCase):
        self.assertIsInstance(model, RobertaForMaskedLM)
        self.assertEqual(model.num_parameters(), 14830)
        self.assertEqual(model.num_parameters(only_trainable=True), 14830)
+
+    def test_parents_and_children_in_mappings(self):
+        # Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
+        # by the parents and will return the wrong configuration type when using auto models
+
+        mappings = (
+            MODEL_MAPPING,
+            MODEL_FOR_PRETRAINING_MAPPING,
+            MODEL_FOR_QUESTION_ANSWERING_MAPPING,
+            MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+            MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
+            MODEL_WITH_LM_HEAD_MAPPING,
+        )
+
+        for mapping in mappings:
+            mapping = tuple(mapping.items())
+            for index, (child_config, child_model) in enumerate(mapping[1:]):
+                for parent_config, parent_model in mapping[: index + 1]:
+                    with self.subTest(
+                        msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
+                    ):
+                        self.assertFalse(issubclass(child_config, parent_config))
+                        self.assertFalse(issubclass(child_model, parent_model))
--- a/tests/test_tokenization_auto.py
+++ b/tests/test_tokenization_auto.py
@@ -25,6 +25,7 @@ from transformers import (
    GPT2Tokenizer,
    RobertaTokenizer,
 )
+from transformers.tokenization_auto import TOKENIZER_MAPPING

 from .utils import DUMMY_UNKWOWN_IDENTIFIER, SMALL_MODEL_IDENTIFIER, slow  # noqa: F401

@@ -70,3 +71,19 @@ class AutoTokenizerTest(unittest.TestCase):
        for tokenizer_class in [BertTokenizer, AutoTokenizer]:
            with self.assertRaises(EnvironmentError):
                _ = tokenizer_class.from_pretrained("julien-c/herlolip-not-exists")
+
+    def test_parents_and_children_in_mappings(self):
+        # Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
+        # by the parents and will return the wrong configuration type when using auto models
+
+        mappings = (TOKENIZER_MAPPING,)
+
+        for mapping in mappings:
+            mapping = tuple(mapping.items())
+            for index, (child_config, child_model) in enumerate(mapping[1:]):
+                for parent_config, parent_model in mapping[: index + 1]:
+                    with self.subTest(
+                        msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
+                    ):
+                        self.assertFalse(issubclass(child_config, parent_config))
+                        self.assertFalse(issubclass(child_model, parent_model))
Author	SHA1	Message	Date
Lysandre	d426b58b9e	Patch: v2.4.1	2020-01-31 14:55:33 -05:00
Lysandre	1e82cd8457	Flaubert auto tokenizer + tests cc @julien-c	2020-01-31 14:16:52 -05:00
Lysandre	d18d47be67	run_generation style	2020-01-31 12:05:48 -05:00
Lysandre	ff6f1492e8	FlauBERT load in AutoModel The FlauBERT configuration file inherits from XLMConfig, and is recognized as such when loading from AutoModels as the XLMConfig is checked before the FlaubertConfig. Changing the order solves this problem, but a test should be added.	2020-01-31 12:05:15 -05:00
Lysandre	7365f01d43	do_sample should be set to True in run_generation.py	2020-01-31 11:49:32 -05:00
Arnaud	3a21d6da6b	Typo on markdown link in README.md	2020-01-31 10:58:49 -05:00
Lysandre	0aa40e9569	v2.4.0 documentation	2020-01-31 09:55:34 -05:00
Lysandre	8036ceb7c5	Update commands for pypi test	2020-01-31 09:48:15 -05:00