Fix regression in regression (#11785)
* Fix regression in regression * Add test
This commit is contained in:
@@ -1037,7 +1037,10 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1528,7 +1528,10 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -2671,7 +2671,10 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1023,7 +1023,10 @@ class ConvBertForSequenceClassification(ConvBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -642,7 +642,10 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -964,7 +964,10 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1298,7 +1298,10 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1872,7 +1872,10 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1279,7 +1279,10 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -2445,7 +2445,10 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1178,7 +1178,10 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -798,7 +798,10 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -847,7 +847,10 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -1562,7 +1562,10 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import os.path
|
|||||||
import random
|
import random
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
import warnings
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from huggingface_hub import HfApi
|
from huggingface_hub import HfApi
|
||||||
@@ -1462,7 +1463,14 @@ class ModelTesterMixin:
|
|||||||
|
|
||||||
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
|
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
|
||||||
|
|
||||||
|
# This tests that we do not trigger the warning form PyTorch "Using a target size that is different
|
||||||
|
# to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
|
||||||
|
# they have the same size." which is a symptom something in wrong for the regression problem.
|
||||||
|
# See https://github.com/huggingface/transformers/issues/11780
|
||||||
|
with warnings.catch_warnings(record=True) as warning_list:
|
||||||
loss = model(**inputs).loss
|
loss = model(**inputs).loss
|
||||||
|
self.assertListEqual(warning_list, [])
|
||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user