fill_mask helper (#2576)

* fill_mask helper * [poc] FillMaskPipeline * Revert "[poc] FillMaskPipeline" This reverts commit 67eeea55b0f97b46c2b828de0f4ee97d87338335. * Revert "fill_mask helper" This reverts commit cacc17b884e14bb6b07989110ffe884ad9e36eaa. * README: clarify that Pipelines can also do text-classification cf. question at the AI&ML meetup last week, @mfuntowicz * Fix test: test feature-extraction pipeline * Test tweaks * Slight refactor of existing pipeline (in preparation of new FillMaskPipeline) * Extraneous doc * More robust way of doing this @mfuntowicz as we don't rely on the model name anymore (see AutoConfig) * Also add RobertaConfig as a quickfix for wrong token_type_ids * cs * [BIG] FillMaskPipeline
2020-01-30 18:15:42 -05:00
parent b43cb09aaa
commit 9fa836a73f
4 changed files with 201 additions and 18 deletions
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -1,7 +1,8 @@
 import unittest
-from typing import Iterable
+from typing import Iterable, List, Optional

 from transformers import pipeline
+from transformers.pipelines import Pipeline

 from .utils import require_tf, require_torch

@@ -62,9 +63,25 @@ TEXT_CLASSIF_FINETUNED_MODELS = {
    )
 }

+FILL_MASK_FINETUNED_MODELS = {
+    ("distilroberta-base", "distilroberta-base", None),
+}
+
+TF_FILL_MASK_FINETUNED_MODELS = {
+    ("distilroberta-base", "distilroberta-base", None),
+}
+

 class MonoColumnInputTestCase(unittest.TestCase):
-    def _test_mono_column_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
+    def _test_mono_column_pipeline(
+        self,
+        nlp: Pipeline,
+        valid_inputs: List,
+        invalid_inputs: List,
+        output_keys: Iterable[str],
+        expected_multi_result: Optional[List] = None,
+        expected_check_keys: Optional[List[str]] = None,
+    ):
        self.assertIsNotNone(nlp)

        mono_result = nlp(valid_inputs[0])
@@ -81,6 +98,13 @@ class MonoColumnInputTestCase(unittest.TestCase):
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], (dict, list))

+        if expected_multi_result is not None:
+            for result, expect in zip(multi_result, expected_multi_result):
+                for key in expected_check_keys or []:
+                    self.assertEqual(
+                        set([o[key] for o in result]), set([o[key] for o in expect]),
+                    )
+
        if isinstance(multi_result[0], list):
            multi_result = multi_result[0]

@@ -110,7 +134,7 @@ class MonoColumnInputTestCase(unittest.TestCase):

    @require_torch
    def test_sentiment_analysis(self):
-        mandatory_keys = {"label"}
+        mandatory_keys = {"label", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
        invalid_inputs = [None]
        for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
@@ -119,7 +143,7 @@ class MonoColumnInputTestCase(unittest.TestCase):

    @require_tf
    def test_tf_sentiment_analysis(self):
-        mandatory_keys = {"label"}
+        mandatory_keys = {"label", "score"}
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
        invalid_inputs = [None]
        for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS:
@@ -127,21 +151,87 @@ class MonoColumnInputTestCase(unittest.TestCase):
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)

    @require_torch
-    def test_features_extraction(self):
+    def test_feature_extraction(self):
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
        invalid_inputs = [None]
        for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
-            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
+            nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer)
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})

    @require_tf
-    def test_tf_features_extraction(self):
+    def test_tf_feature_extraction(self):
        valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
        invalid_inputs = [None]
        for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS:
-            nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
+            nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer)
            self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})

+    @require_torch
+    def test_fill_mask(self):
+        mandatory_keys = {"sequence", "score", "token"}
+        valid_inputs = [
+            "My name is <mask>",
+            "The largest city in France is <mask>",
+        ]
+        invalid_inputs = [None]
+        expected_multi_result = [
+            [
+                {"score": 0.008698059245944023, "sequence": "<s>My name is John</s>", "token": 610},
+                {"score": 0.007750614080578089, "sequence": "<s>My name is Chris</s>", "token": 1573},
+            ],
+            [
+                {"score": 0.2721288502216339, "sequence": "<s>The largest city in France is Paris</s>", "token": 2201},
+                {
+                    "score": 0.19764970242977142,
+                    "sequence": "<s>The largest city in France is Lyon</s>",
+                    "token": 12790,
+                },
+            ],
+        ]
+        for tokenizer, model, config in FILL_MASK_FINETUNED_MODELS:
+            nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, topk=2)
+            self._test_mono_column_pipeline(
+                nlp,
+                valid_inputs,
+                invalid_inputs,
+                mandatory_keys,
+                expected_multi_result=expected_multi_result,
+                expected_check_keys=["sequence"],
+            )
+
+    @require_tf
+    def test_tf_fill_mask(self):
+        mandatory_keys = {"sequence", "score", "token"}
+        valid_inputs = [
+            "My name is <mask>",
+            "The largest city in France is <mask>",
+        ]
+        invalid_inputs = [None]
+        expected_multi_result = [
+            [
+                {"score": 0.008698059245944023, "sequence": "<s>My name is John</s>", "token": 610},
+                {"score": 0.007750614080578089, "sequence": "<s>My name is Chris</s>", "token": 1573},
+            ],
+            [
+                {"score": 0.2721288502216339, "sequence": "<s>The largest city in France is Paris</s>", "token": 2201},
+                {
+                    "score": 0.19764970242977142,
+                    "sequence": "<s>The largest city in France is Lyon</s>",
+                    "token": 12790,
+                },
+            ],
+        ]
+        for tokenizer, model, config in TF_FILL_MASK_FINETUNED_MODELS:
+            nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, topk=2)
+            self._test_mono_column_pipeline(
+                nlp,
+                valid_inputs,
+                invalid_inputs,
+                mandatory_keys,
+                expected_multi_result=expected_multi_result,
+                expected_check_keys=["sequence"],
+            )
+

 class MultiColumnInputTestCase(unittest.TestCase):
    def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):