From 5ee9e786d115154c0c58dc961e39105a205ccac0 Mon Sep 17 00:00:00 2001
From: Subhalingam D <subhalingam.d@gmail.com>
Date: Tue, 15 Oct 2024 17:36:20 +0530
Subject: [PATCH] Fix default behaviour in TextClassificationPipeline for
 regression problem type (#34066)

* update code

* update docstrings

* update tests
---
 src/transformers/pipelines/text_classification.py     | 11 ++++++++---
 tests/pipelines/test_pipelines_text_classification.py |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/transformers/pipelines/text_classification.py b/src/transformers/pipelines/text_classification.py
index 21ca70c2ac..dadb29c386 100644
--- a/src/transformers/pipelines/text_classification.py
+++ b/src/transformers/pipelines/text_classification.py
@@ -40,7 +40,8 @@ class ClassificationFunction(ExplicitEnum):
             The function to apply to the model outputs in order to retrieve the scores. Accepts four different values:
 
             - `"default"`: if the model has a single label, will apply the sigmoid function on the output. If the model
-              has several labels, will apply the softmax function on the output.
+              has several labels, will apply the softmax function on the output. In case of regression tasks, will not
+              apply any function on the output.
             - `"sigmoid"`: Applies the sigmoid function on the output.
             - `"softmax"`: Applies the softmax function on the output.
             - `"none"`: Does not apply any function on the output.""",
@@ -69,7 +70,8 @@ class TextClassificationPipeline(Pipeline):
     `"sentiment-analysis"` (for classifying sequences according to positive or negative sentiments).
 
     If multiple classification labels are available (`model.config.num_labels >= 2`), the pipeline will run a softmax
-    over the results. If there is a single label, the pipeline will run a sigmoid over the result.
+    over the results. If there is a single label, the pipeline will run a sigmoid over the result. In case of regression
+    tasks (`model.config.problem_type == "regression"`), will not apply any function on the output.
 
     The models that this pipeline can use are models that have been fine-tuned on a sequence classification task. See
     the up-to-date list of available models on
@@ -135,6 +137,7 @@ class TextClassificationPipeline(Pipeline):
                 If this argument is not specified, then it will apply the following functions according to the number
                 of labels:
 
+                - If problem type is regression, will not apply any function on the output.
                 - If the model has a single label, will apply the sigmoid function on the output.
                 - If the model has several labels, will apply the softmax function on the output.
 
@@ -192,7 +195,9 @@ class TextClassificationPipeline(Pipeline):
         # the more natural result containing the list.
         # Default value before `set_parameters`
         if function_to_apply is None:
-            if self.model.config.problem_type == "multi_label_classification" or self.model.config.num_labels == 1:
+            if self.model.config.problem_type == "regression":
+                function_to_apply = ClassificationFunction.NONE
+            elif self.model.config.problem_type == "multi_label_classification" or self.model.config.num_labels == 1:
                 function_to_apply = ClassificationFunction.SIGMOID
             elif self.model.config.problem_type == "single_label_classification" or self.model.config.num_labels > 1:
                 function_to_apply = ClassificationFunction.SOFTMAX
diff --git a/tests/pipelines/test_pipelines_text_classification.py b/tests/pipelines/test_pipelines_text_classification.py
index 1f3b31b858..23625f0d77 100644
--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -108,6 +108,12 @@ class TextClassificationPipelineTests(unittest.TestCase):
             ],
         )
 
+        # Do not apply any function to output for regression tasks
+        # hack: changing problem_type artifically (so keep this test at last)
+        text_classifier.model.config.problem_type = "regression"
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.01}])
+
     @require_torch
     def test_accepts_torch_device(self):
         text_classifier = pipeline(