Support QuestionAnswering Module for ModernBert based models. (#35566)

* push ModernBertForQuestionAnswering

* update ModernBertForQuestionAnswering

* update __init__ loading

* set imports for ModernBertForQuestionAnswering

* update ModernBertForQuestionAnswering

* remove debugging logs

* update init_weights method

* remove custom initialization for ModernBertForQuestionAnswering

* apply make fix-copies

* apply make style

* apply make fix-copies

* append ModernBertForQuestionAnswering to the pipeline supported models

* remove unused file

* remove invalid autoload value

* update en/model_doc/modernbert.md

* apply make fixup command

* make fixup

* Update dummies

* update usage tips for ModernBertForQuestionAnswering

* update usage tips for ModernBertForQuestionAnswering

* add init

* add lint

* add consistency

* update init test

* change text to trigger stuck text

* use self.loss_function instead of custom loss

By @Cyrilvallez

Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>

* Update modeling_modernbert.py

make comparable commit to even it out

* Match whitespace

* whitespace

---------

Co-authored-by: Matt <rocketknight1@gmail.com>
Co-authored-by: Orion Weller <wellerorion@gmail.com>
Co-authored-by: Orion Weller <31665361+orionw@users.noreply.github.com>
Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>
This commit is contained in:
Abu Bakr Soliman
2025-03-26 22:24:18 +02:00
committed by GitHub
parent 5b08db8844
commit 49b5ab6a27
8 changed files with 225 additions and 4 deletions

View File

@@ -40,6 +40,7 @@ if is_torch_available():
from transformers import (
MODEL_FOR_PRETRAINING_MAPPING,
ModernBertForMaskedLM,
ModernBertForQuestionAnswering,
ModernBertForSequenceClassification,
ModernBertForTokenClassification,
ModernBertModel,
@@ -224,6 +225,7 @@ class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
ModernBertForMaskedLM,
ModernBertForSequenceClassification,
ModernBertForTokenClassification,
ModernBertForQuestionAnswering,
)
if is_torch_available()
else ()
@@ -235,6 +237,7 @@ class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
"text-classification": ModernBertForSequenceClassification,
"token-classification": ModernBertForTokenClassification,
"zero-shot": ModernBertForSequenceClassification,
"question-answering": ModernBertForQuestionAnswering,
}
if is_torch_available()
else {}
@@ -289,7 +292,12 @@ class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
# are initialized without `initializer_range`, so they're not set to ~0 via the _config_zero_init
if param.requires_grad and not (
name == "classifier.weight"
and model_class in [ModernBertForSequenceClassification, ModernBertForTokenClassification]
and model_class
in [
ModernBertForSequenceClassification,
ModernBertForTokenClassification,
ModernBertForQuestionAnswering,
]
):
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),

View File

@@ -3086,6 +3086,7 @@ class ModelTesterMixin:
"ModernBertForSequenceClassification",
"ModernBertForTokenClassification",
"TimmWrapperForImageClassification",
"ModernBertForQuestionAnswering",
]
special_param_names = [
r"^bit\.",