fill_mask helper (#2576)
* fill_mask helper * [poc] FillMaskPipeline * Revert "[poc] FillMaskPipeline" This reverts commit 67eeea55b0f97b46c2b828de0f4ee97d87338335. * Revert "fill_mask helper" This reverts commit cacc17b884e14bb6b07989110ffe884ad9e36eaa. * README: clarify that Pipelines can also do text-classification cf. question at the AI&ML meetup last week, @mfuntowicz * Fix test: test feature-extraction pipeline * Test tweaks * Slight refactor of existing pipeline (in preparation of new FillMaskPipeline) * Extraneous doc * More robust way of doing this @mfuntowicz as we don't rely on the model name anymore (see AutoConfig) * Also add RobertaConfig as a quickfix for wrong token_type_ids * cs * [BIG] FillMaskPipeline
This commit is contained in:
@@ -521,8 +521,9 @@ You can create `Pipeline` objects for the following down-stream tasks:
|
|||||||
- `feature-extraction`: Generates a tensor representation for the input sequence
|
- `feature-extraction`: Generates a tensor representation for the input sequence
|
||||||
- `ner`: Generates named entity mapping for each word in the input sequence.
|
- `ner`: Generates named entity mapping for each word in the input sequence.
|
||||||
- `sentiment-analysis`: Gives the polarity (positive / negative) of the whole input sequence.
|
- `sentiment-analysis`: Gives the polarity (positive / negative) of the whole input sequence.
|
||||||
- `question-answering`: Provided some context and a question refering to the context, it will extract the answer to the question
|
- `text-classification`: Initialize a `TextClassificationPipeline` directly, or see `sentiment-analysis` for an example.
|
||||||
in the context.
|
- `question-answering`: Provided some context and a question refering to the context, it will extract the answer to the question in the context.
|
||||||
|
- `fill-mask`: Takes an input sequence containing a masked token (e.g. `<mask>`) and return list of most probable filled sequences, with their probabilities.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ from .modeling_tf_pytorch_utils import (
|
|||||||
from .pipelines import (
|
from .pipelines import (
|
||||||
CsvPipelineDataFormat,
|
CsvPipelineDataFormat,
|
||||||
FeatureExtractionPipeline,
|
FeatureExtractionPipeline,
|
||||||
|
FillMaskPipeline,
|
||||||
JsonPipelineDataFormat,
|
JsonPipelineDataFormat,
|
||||||
NerPipeline,
|
NerPipeline,
|
||||||
PipedPipelineDataFormat,
|
PipedPipelineDataFormat,
|
||||||
|
|||||||
@@ -28,7 +28,10 @@ from typing import Dict, List, Optional, Tuple, Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig
|
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig
|
||||||
|
from .configuration_distilbert import DistilBertConfig
|
||||||
|
from .configuration_roberta import RobertaConfig
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
|
from .configuration_xlm import XLMConfig
|
||||||
from .data import SquadExample, squad_convert_examples_to_features
|
from .data import SquadExample, squad_convert_examples_to_features
|
||||||
from .file_utils import is_tf_available, is_torch_available
|
from .file_utils import is_tf_available, is_torch_available
|
||||||
from .modelcard import ModelCard
|
from .modelcard import ModelCard
|
||||||
@@ -44,6 +47,7 @@ if is_tf_available():
|
|||||||
TFAutoModelForSequenceClassification,
|
TFAutoModelForSequenceClassification,
|
||||||
TFAutoModelForQuestionAnswering,
|
TFAutoModelForQuestionAnswering,
|
||||||
TFAutoModelForTokenClassification,
|
TFAutoModelForTokenClassification,
|
||||||
|
TFAutoModelWithLMHead,
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
@@ -53,6 +57,7 @@ if is_torch_available():
|
|||||||
AutoModelForSequenceClassification,
|
AutoModelForSequenceClassification,
|
||||||
AutoModelForQuestionAnswering,
|
AutoModelForQuestionAnswering,
|
||||||
AutoModelForTokenClassification,
|
AutoModelForTokenClassification,
|
||||||
|
AutoModelWithLMHead,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -64,7 +69,7 @@ def get_framework(model=None):
|
|||||||
If both frameworks are installed and no specific model is provided, defaults to using PyTorch.
|
If both frameworks are installed and no specific model is provided, defaults to using PyTorch.
|
||||||
"""
|
"""
|
||||||
if is_tf_available() and is_torch_available() and model is not None and not isinstance(model, str):
|
if is_tf_available() and is_torch_available() and model is not None and not isinstance(model, str):
|
||||||
# Both framework are available but the use supplied a model class instance.
|
# Both framework are available but the user supplied a model class instance.
|
||||||
# Try to guess which framework to use from the model classname
|
# Try to guess which framework to use from the model classname
|
||||||
framework = "tf" if model.__class__.__name__.startswith("TF") else "pt"
|
framework = "tf" if model.__class__.__name__.startswith("TF") else "pt"
|
||||||
elif not is_tf_available() and not is_torch_available():
|
elif not is_tf_available() and not is_torch_available():
|
||||||
@@ -364,7 +369,6 @@ class Pipeline(_ScikitCompat):
|
|||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
"""
|
"""
|
||||||
Scikit / Keras interface to transformers' pipelines. This method will forward to __call__().
|
Scikit / Keras interface to transformers' pipelines. This method will forward to __call__().
|
||||||
Se
|
|
||||||
"""
|
"""
|
||||||
return self(X=X)
|
return self(X=X)
|
||||||
|
|
||||||
@@ -406,9 +410,8 @@ class Pipeline(_ScikitCompat):
|
|||||||
dict holding all the required parameters for model's forward
|
dict holding all the required parameters for model's forward
|
||||||
"""
|
"""
|
||||||
args = ["input_ids", "attention_mask"]
|
args = ["input_ids", "attention_mask"]
|
||||||
model_type = type(self.model).__name__.lower()
|
|
||||||
|
|
||||||
if "distilbert" not in model_type and "xlm" not in model_type:
|
if not isinstance(self.model.config, (DistilBertConfig, XLMConfig, RobertaConfig)):
|
||||||
args += ["token_type_ids"]
|
args += ["token_type_ids"]
|
||||||
|
|
||||||
# PR #1548 (CLI) There is an issue with attention_mask
|
# PR #1548 (CLI) There is an issue with attention_mask
|
||||||
@@ -420,7 +423,10 @@ class Pipeline(_ScikitCompat):
|
|||||||
else:
|
else:
|
||||||
return {k: [feature[k] for feature in features] for k in args}
|
return {k: [feature[k] for feature in features] for k in args}
|
||||||
|
|
||||||
def __call__(self, *texts, **kwargs):
|
def _parse_and_tokenize(self, *texts, **kwargs):
|
||||||
|
"""
|
||||||
|
Parse arguments and tokenize
|
||||||
|
"""
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
inputs = self._args_parser(*texts, **kwargs)
|
inputs = self._args_parser(*texts, **kwargs)
|
||||||
inputs = self.tokenizer.batch_encode_plus(
|
inputs = self.tokenizer.batch_encode_plus(
|
||||||
@@ -429,13 +435,19 @@ class Pipeline(_ScikitCompat):
|
|||||||
|
|
||||||
# Filter out features not available on specific models
|
# Filter out features not available on specific models
|
||||||
inputs = self.inputs_for_model(inputs)
|
inputs = self.inputs_for_model(inputs)
|
||||||
|
|
||||||
|
return inputs
|
||||||
|
|
||||||
|
def __call__(self, *texts, **kwargs):
|
||||||
|
inputs = self._parse_and_tokenize(*texts, **kwargs)
|
||||||
return self._forward(inputs)
|
return self._forward(inputs)
|
||||||
|
|
||||||
def _forward(self, inputs):
|
def _forward(self, inputs, return_tensors=False):
|
||||||
"""
|
"""
|
||||||
Internal framework specific forward dispatching.
|
Internal framework specific forward dispatching.
|
||||||
Args:
|
Args:
|
||||||
inputs: dict holding all the keyworded arguments for required by the model forward method.
|
inputs: dict holding all the keyworded arguments for required by the model forward method.
|
||||||
|
return_tensors: Whether to return native framework (pt/tf) tensors rather than numpy array.
|
||||||
Returns:
|
Returns:
|
||||||
Numpy array
|
Numpy array
|
||||||
"""
|
"""
|
||||||
@@ -449,7 +461,10 @@ class Pipeline(_ScikitCompat):
|
|||||||
inputs = self.ensure_tensor_on_device(**inputs)
|
inputs = self.ensure_tensor_on_device(**inputs)
|
||||||
predictions = self.model(**inputs)[0].cpu()
|
predictions = self.model(**inputs)[0].cpu()
|
||||||
|
|
||||||
return predictions.numpy()
|
if return_tensors:
|
||||||
|
return predictions
|
||||||
|
else:
|
||||||
|
return predictions.numpy()
|
||||||
|
|
||||||
|
|
||||||
class FeatureExtractionPipeline(Pipeline):
|
class FeatureExtractionPipeline(Pipeline):
|
||||||
@@ -491,6 +506,71 @@ class TextClassificationPipeline(Pipeline):
|
|||||||
return [{"label": self.model.config.id2label[item.argmax()], "score": item.max()} for item in scores]
|
return [{"label": self.model.config.id2label[item.argmax()], "score": item.max()} for item in scores]
|
||||||
|
|
||||||
|
|
||||||
|
class FillMaskPipeline(Pipeline):
|
||||||
|
"""
|
||||||
|
Masked language modeling prediction pipeline using ModelWithLMHead head.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model,
|
||||||
|
tokenizer: PreTrainedTokenizer = None,
|
||||||
|
modelcard: ModelCard = None,
|
||||||
|
framework: Optional[str] = None,
|
||||||
|
args_parser: ArgumentHandler = None,
|
||||||
|
device: int = -1,
|
||||||
|
topk=5,
|
||||||
|
):
|
||||||
|
super().__init__(
|
||||||
|
model=model,
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
modelcard=modelcard,
|
||||||
|
framework=framework,
|
||||||
|
args_parser=args_parser,
|
||||||
|
device=device,
|
||||||
|
binary_output=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.topk = topk
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
inputs = self._parse_and_tokenize(*args, **kwargs)
|
||||||
|
outputs = self._forward(inputs, return_tensors=True)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
batch_size = outputs.shape[0] if self.framework == "tf" else outputs.size(0)
|
||||||
|
|
||||||
|
for i in range(batch_size):
|
||||||
|
input_ids = inputs["input_ids"][i]
|
||||||
|
result = []
|
||||||
|
|
||||||
|
if self.framework == "tf":
|
||||||
|
masked_index = tf.where(input_ids == self.tokenizer.mask_token_id).numpy().item()
|
||||||
|
logits = outputs[i, masked_index, :]
|
||||||
|
probs = tf.nn.softmax(logits)
|
||||||
|
topk = tf.math.top_k(probs, k=self.topk)
|
||||||
|
values, predictions = topk.values.numpy(), topk.indices.numpy()
|
||||||
|
else:
|
||||||
|
masked_index = (input_ids == self.tokenizer.mask_token_id).nonzero().item()
|
||||||
|
logits = outputs[i, masked_index, :]
|
||||||
|
probs = logits.softmax(dim=0)
|
||||||
|
values, predictions = probs.topk(self.topk)
|
||||||
|
|
||||||
|
for v, p in zip(values.tolist(), predictions.tolist()):
|
||||||
|
tokens = input_ids.numpy()
|
||||||
|
tokens[masked_index] = p
|
||||||
|
# Filter padding out:
|
||||||
|
tokens = tokens[np.where(tokens != self.tokenizer.pad_token_id)]
|
||||||
|
result.append({"sequence": self.tokenizer.decode(tokens), "score": v, "token": p})
|
||||||
|
|
||||||
|
# Append
|
||||||
|
results += [result]
|
||||||
|
|
||||||
|
if len(results) == 1:
|
||||||
|
return results[0]
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
class NerPipeline(Pipeline):
|
class NerPipeline(Pipeline):
|
||||||
"""
|
"""
|
||||||
Named Entity Recognition pipeline using ModelForTokenClassification head.
|
Named Entity Recognition pipeline using ModelForTokenClassification head.
|
||||||
@@ -523,7 +603,8 @@ class NerPipeline(Pipeline):
|
|||||||
self.ignore_labels = ignore_labels
|
self.ignore_labels = ignore_labels
|
||||||
|
|
||||||
def __call__(self, *texts, **kwargs):
|
def __call__(self, *texts, **kwargs):
|
||||||
inputs, answers = self._args_parser(*texts, **kwargs), []
|
inputs = self._args_parser(*texts, **kwargs)
|
||||||
|
answers = []
|
||||||
for sentence in inputs:
|
for sentence in inputs:
|
||||||
|
|
||||||
# Manage correct placement of the tensors
|
# Manage correct placement of the tensors
|
||||||
@@ -903,6 +984,16 @@ SUPPORTED_TASKS = {
|
|||||||
"tokenizer": "distilbert-base-uncased",
|
"tokenizer": "distilbert-base-uncased",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"fill-mask": {
|
||||||
|
"impl": FillMaskPipeline,
|
||||||
|
"tf": TFAutoModelWithLMHead if is_tf_available() else None,
|
||||||
|
"pt": AutoModelWithLMHead if is_torch_available() else None,
|
||||||
|
"default": {
|
||||||
|
"model": {"pt": "distilroberta-base", "tf": "distilroberta-base"},
|
||||||
|
"config": None,
|
||||||
|
"tokenizer": "distilroberta-base",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from typing import Iterable
|
from typing import Iterable, List, Optional
|
||||||
|
|
||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
|
from transformers.pipelines import Pipeline
|
||||||
|
|
||||||
from .utils import require_tf, require_torch
|
from .utils import require_tf, require_torch
|
||||||
|
|
||||||
@@ -62,9 +63,25 @@ TEXT_CLASSIF_FINETUNED_MODELS = {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FILL_MASK_FINETUNED_MODELS = {
|
||||||
|
("distilroberta-base", "distilroberta-base", None),
|
||||||
|
}
|
||||||
|
|
||||||
|
TF_FILL_MASK_FINETUNED_MODELS = {
|
||||||
|
("distilroberta-base", "distilroberta-base", None),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class MonoColumnInputTestCase(unittest.TestCase):
|
class MonoColumnInputTestCase(unittest.TestCase):
|
||||||
def _test_mono_column_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
|
def _test_mono_column_pipeline(
|
||||||
|
self,
|
||||||
|
nlp: Pipeline,
|
||||||
|
valid_inputs: List,
|
||||||
|
invalid_inputs: List,
|
||||||
|
output_keys: Iterable[str],
|
||||||
|
expected_multi_result: Optional[List] = None,
|
||||||
|
expected_check_keys: Optional[List[str]] = None,
|
||||||
|
):
|
||||||
self.assertIsNotNone(nlp)
|
self.assertIsNotNone(nlp)
|
||||||
|
|
||||||
mono_result = nlp(valid_inputs[0])
|
mono_result = nlp(valid_inputs[0])
|
||||||
@@ -81,6 +98,13 @@ class MonoColumnInputTestCase(unittest.TestCase):
|
|||||||
self.assertIsInstance(multi_result, list)
|
self.assertIsInstance(multi_result, list)
|
||||||
self.assertIsInstance(multi_result[0], (dict, list))
|
self.assertIsInstance(multi_result[0], (dict, list))
|
||||||
|
|
||||||
|
if expected_multi_result is not None:
|
||||||
|
for result, expect in zip(multi_result, expected_multi_result):
|
||||||
|
for key in expected_check_keys or []:
|
||||||
|
self.assertEqual(
|
||||||
|
set([o[key] for o in result]), set([o[key] for o in expect]),
|
||||||
|
)
|
||||||
|
|
||||||
if isinstance(multi_result[0], list):
|
if isinstance(multi_result[0], list):
|
||||||
multi_result = multi_result[0]
|
multi_result = multi_result[0]
|
||||||
|
|
||||||
@@ -110,7 +134,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_sentiment_analysis(self):
|
def test_sentiment_analysis(self):
|
||||||
mandatory_keys = {"label"}
|
mandatory_keys = {"label", "score"}
|
||||||
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
||||||
invalid_inputs = [None]
|
invalid_inputs = [None]
|
||||||
for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
|
for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
|
||||||
@@ -119,7 +143,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
@require_tf
|
@require_tf
|
||||||
def test_tf_sentiment_analysis(self):
|
def test_tf_sentiment_analysis(self):
|
||||||
mandatory_keys = {"label"}
|
mandatory_keys = {"label", "score"}
|
||||||
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
||||||
invalid_inputs = [None]
|
invalid_inputs = [None]
|
||||||
for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS:
|
for tokenizer, model, config in TF_TEXT_CLASSIF_FINETUNED_MODELS:
|
||||||
@@ -127,21 +151,87 @@ class MonoColumnInputTestCase(unittest.TestCase):
|
|||||||
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
|
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_features_extraction(self):
|
def test_feature_extraction(self):
|
||||||
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
||||||
invalid_inputs = [None]
|
invalid_inputs = [None]
|
||||||
for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
|
for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
|
||||||
nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
|
nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer)
|
||||||
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
|
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
|
||||||
|
|
||||||
@require_tf
|
@require_tf
|
||||||
def test_tf_features_extraction(self):
|
def test_tf_feature_extraction(self):
|
||||||
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
valid_inputs = ["HuggingFace is solving NLP one commit at a time.", "HuggingFace is based in New-York & Paris"]
|
||||||
invalid_inputs = [None]
|
invalid_inputs = [None]
|
||||||
for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS:
|
for tokenizer, model, config in TF_FEATURE_EXTRACT_FINETUNED_MODELS:
|
||||||
nlp = pipeline(task="sentiment-analysis", model=model, config=config, tokenizer=tokenizer)
|
nlp = pipeline(task="feature-extraction", model=model, config=config, tokenizer=tokenizer)
|
||||||
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
|
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
def test_fill_mask(self):
|
||||||
|
mandatory_keys = {"sequence", "score", "token"}
|
||||||
|
valid_inputs = [
|
||||||
|
"My name is <mask>",
|
||||||
|
"The largest city in France is <mask>",
|
||||||
|
]
|
||||||
|
invalid_inputs = [None]
|
||||||
|
expected_multi_result = [
|
||||||
|
[
|
||||||
|
{"score": 0.008698059245944023, "sequence": "<s>My name is John</s>", "token": 610},
|
||||||
|
{"score": 0.007750614080578089, "sequence": "<s>My name is Chris</s>", "token": 1573},
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"score": 0.2721288502216339, "sequence": "<s>The largest city in France is Paris</s>", "token": 2201},
|
||||||
|
{
|
||||||
|
"score": 0.19764970242977142,
|
||||||
|
"sequence": "<s>The largest city in France is Lyon</s>",
|
||||||
|
"token": 12790,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]
|
||||||
|
for tokenizer, model, config in FILL_MASK_FINETUNED_MODELS:
|
||||||
|
nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, topk=2)
|
||||||
|
self._test_mono_column_pipeline(
|
||||||
|
nlp,
|
||||||
|
valid_inputs,
|
||||||
|
invalid_inputs,
|
||||||
|
mandatory_keys,
|
||||||
|
expected_multi_result=expected_multi_result,
|
||||||
|
expected_check_keys=["sequence"],
|
||||||
|
)
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
def test_tf_fill_mask(self):
|
||||||
|
mandatory_keys = {"sequence", "score", "token"}
|
||||||
|
valid_inputs = [
|
||||||
|
"My name is <mask>",
|
||||||
|
"The largest city in France is <mask>",
|
||||||
|
]
|
||||||
|
invalid_inputs = [None]
|
||||||
|
expected_multi_result = [
|
||||||
|
[
|
||||||
|
{"score": 0.008698059245944023, "sequence": "<s>My name is John</s>", "token": 610},
|
||||||
|
{"score": 0.007750614080578089, "sequence": "<s>My name is Chris</s>", "token": 1573},
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"score": 0.2721288502216339, "sequence": "<s>The largest city in France is Paris</s>", "token": 2201},
|
||||||
|
{
|
||||||
|
"score": 0.19764970242977142,
|
||||||
|
"sequence": "<s>The largest city in France is Lyon</s>",
|
||||||
|
"token": 12790,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]
|
||||||
|
for tokenizer, model, config in TF_FILL_MASK_FINETUNED_MODELS:
|
||||||
|
nlp = pipeline(task="fill-mask", model=model, config=config, tokenizer=tokenizer, topk=2)
|
||||||
|
self._test_mono_column_pipeline(
|
||||||
|
nlp,
|
||||||
|
valid_inputs,
|
||||||
|
invalid_inputs,
|
||||||
|
mandatory_keys,
|
||||||
|
expected_multi_result=expected_multi_result,
|
||||||
|
expected_check_keys=["sequence"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MultiColumnInputTestCase(unittest.TestCase):
|
class MultiColumnInputTestCase(unittest.TestCase):
|
||||||
def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
|
def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
|
||||||
|
|||||||
Reference in New Issue
Block a user