Support QuestionAnswering Module for ModernBert based models. (#35566)

* push ModernBertForQuestionAnswering * update ModernBertForQuestionAnswering * update __init__ loading * set imports for ModernBertForQuestionAnswering * update ModernBertForQuestionAnswering * remove debugging logs * update init_weights method * remove custom initialization for ModernBertForQuestionAnswering * apply make fix-copies * apply make style * apply make fix-copies * append ModernBertForQuestionAnswering to the pipeline supported models * remove unused file * remove invalid autoload value * update en/model_doc/modernbert.md * apply make fixup command * make fixup * Update dummies * update usage tips for ModernBertForQuestionAnswering * update usage tips for ModernBertForQuestionAnswering * add init * add lint * add consistency * update init test * change text to trigger stuck text * use self.loss_function instead of custom loss By @Cyrilvallez Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com> * Update modeling_modernbert.py make comparable commit to even it out * Match whitespace * whitespace --------- Co-authored-by: Matt <rocketknight1@gmail.com> Co-authored-by: Orion Weller <wellerorion@gmail.com> Co-authored-by: Orion Weller <31665361+orionw@users.noreply.github.com> Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>
2025-03-26 22:24:18 +02:00
parent 5b08db8844
commit 49b5ab6a27
8 changed files with 225 additions and 4 deletions
--- a/tests/models/modernbert/test_modeling_modernbert.py
+++ b/tests/models/modernbert/test_modeling_modernbert.py
@@ -40,6 +40,7 @@ if is_torch_available():
    from transformers import (
        MODEL_FOR_PRETRAINING_MAPPING,
        ModernBertForMaskedLM,
+        ModernBertForQuestionAnswering,
        ModernBertForSequenceClassification,
        ModernBertForTokenClassification,
        ModernBertModel,
@@ -224,6 +225,7 @@ class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
            ModernBertForMaskedLM,
            ModernBertForSequenceClassification,
            ModernBertForTokenClassification,
+            ModernBertForQuestionAnswering,
        )
        if is_torch_available()
        else ()
@@ -235,6 +237,7 @@ class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
            "text-classification": ModernBertForSequenceClassification,
            "token-classification": ModernBertForTokenClassification,
            "zero-shot": ModernBertForSequenceClassification,
+            "question-answering": ModernBertForQuestionAnswering,
        }
        if is_torch_available()
        else {}
@@ -289,7 +292,12 @@ class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
                # are initialized without `initializer_range`, so they're not set to ~0 via the _config_zero_init
                if param.requires_grad and not (
                    name == "classifier.weight"
-                    and model_class in [ModernBertForSequenceClassification, ModernBertForTokenClassification]
+                    and model_class
+                    in [
+                        ModernBertForSequenceClassification,
+                        ModernBertForTokenClassification,
+                        ModernBertForQuestionAnswering,
+                    ]
                ):
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -3086,6 +3086,7 @@ class ModelTesterMixin:
                "ModernBertForSequenceClassification",
                "ModernBertForTokenClassification",
                "TimmWrapperForImageClassification",
+                "ModernBertForQuestionAnswering",
            ]
            special_param_names = [
                r"^bit\.",