Fix default behaviour in TextClassificationPipeline for regression problem type (#34066)

* update code * update docstrings * update tests
2024-10-15 17:36:20 +05:30
parent 4de1bdbf63
commit 5ee9e786d1
2 changed files with 14 additions and 3 deletions
--- a/src/transformers/pipelines/text_classification.py
+++ b/src/transformers/pipelines/text_classification.py
@@ -40,7 +40,8 @@ class ClassificationFunction(ExplicitEnum):
            The function to apply to the model outputs in order to retrieve the scores. Accepts four different values:
            - `"default"`: if the model has a single label, will apply the sigmoid function on the output. If the model
-              has several labels, will apply the softmax function on the output.
+              has several labels, will apply the softmax function on the output. In case of regression tasks, will not
              apply any function on the output.
            - `"sigmoid"`: Applies the sigmoid function on the output.
            - `"softmax"`: Applies the softmax function on the output.
            - `"none"`: Does not apply any function on the output.""",
@@ -69,7 +70,8 @@ class TextClassificationPipeline(Pipeline):
    `"sentiment-analysis"` (for classifying sequences according to positive or negative sentiments).
    If multiple classification labels are available (`model.config.num_labels >= 2`), the pipeline will run a softmax
-    over the results. If there is a single label, the pipeline will run a sigmoid over the result.
+    over the results. If there is a single label, the pipeline will run a sigmoid over the result. In case of regression
    tasks (`model.config.problem_type == "regression"`), will not apply any function on the output.
    The models that this pipeline can use are models that have been fine-tuned on a sequence classification task. See
    the up-to-date list of available models on
@@ -135,6 +137,7 @@ class TextClassificationPipeline(Pipeline):
                If this argument is not specified, then it will apply the following functions according to the number
                of labels:
                - If problem type is regression, will not apply any function on the output.
                - If the model has a single label, will apply the sigmoid function on the output.
                - If the model has several labels, will apply the softmax function on the output.
@@ -192,7 +195,9 @@ class TextClassificationPipeline(Pipeline):
        # the more natural result containing the list.
        # Default value before `set_parameters`
        if function_to_apply is None:
-            if self.model.config.problem_type == "multi_label_classification" or self.model.config.num_labels == 1:
+            if self.model.config.problem_type == "regression":
                function_to_apply = ClassificationFunction.NONE
            elif self.model.config.problem_type == "multi_label_classification" or self.model.config.num_labels == 1:
                function_to_apply = ClassificationFunction.SIGMOID
            elif self.model.config.problem_type == "single_label_classification" or self.model.config.num_labels > 1:
                function_to_apply = ClassificationFunction.SOFTMAX
--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -108,6 +108,12 @@ class TextClassificationPipelineTests(unittest.TestCase):
            ],
        )
        # Do not apply any function to output for regression tasks
        # hack: changing problem_type artifically (so keep this test at last)
        text_classifier.model.config.problem_type = "regression"
        outputs = text_classifier("This is great !")
        self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.01}])
    @require_torch
    def test_accepts_torch_device(self):
        text_classifier = pipeline(