Mistral-related models for QnA (#34045)

* mistral qna start * mixtral qna * oops * qwen2 qna * qwen2moe qna * add missing input embed methods * add copied to all methods, can't directly from llama due to the prefix * make top level copied from
2024-10-14 08:53:32 +02:00
parent 37ea04013b
commit 7434c0ed21
19 changed files with 507 additions and 4 deletions
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -47,6 +47,7 @@ if is_torch_available():

    from transformers import (
        MistralForCausalLM,
+        MistralForQuestionAnswering,
        MistralForSequenceClassification,
        MistralForTokenClassification,
        MistralModel,
@@ -291,7 +292,13 @@ class MistralModelTester:
@require_torch
 class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
-        (MistralModel, MistralForCausalLM, MistralForSequenceClassification, MistralForTokenClassification)
+        (
+            MistralModel,
+            MistralForCausalLM,
+            MistralForSequenceClassification,
+            MistralForTokenClassification,
+            MistralForQuestionAnswering,
+        )
        if is_torch_available()
        else ()
    )
@@ -303,6 +310,7 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
            "token-classification": MistralForTokenClassification,
            "text-generation": MistralForCausalLM,
            "zero-shot": MistralForSequenceClassification,
+            "question-answering": MistralForQuestionAnswering,
        }
        if is_torch_available()
        else {}
--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -41,6 +41,7 @@ if is_torch_available():

    from transformers import (
        MixtralForCausalLM,
+        MixtralForQuestionAnswering,
        MixtralForSequenceClassification,
        MixtralForTokenClassification,
        MixtralModel,
@@ -291,7 +292,13 @@ class MixtralModelTester:
 # Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Mixtral
 class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
-        (MixtralModel, MixtralForCausalLM, MixtralForSequenceClassification, MixtralForTokenClassification)
+        (
+            MixtralModel,
+            MixtralForCausalLM,
+            MixtralForSequenceClassification,
+            MixtralForTokenClassification,
+            MixtralForQuestionAnswering,
+        )
        if is_torch_available()
        else ()
    )
@@ -303,6 +310,7 @@ class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
            "token-classification": MixtralForTokenClassification,
            "text-generation": MixtralForCausalLM,
            "zero-shot": MixtralForSequenceClassification,
+            "question-answering": MixtralForQuestionAnswering,
        }
        if is_torch_available()
        else {}
--- a/tests/models/qwen2/test_modeling_qwen2.py
+++ b/tests/models/qwen2/test_modeling_qwen2.py
@@ -43,6 +43,7 @@ if is_torch_available():

    from transformers import (
        Qwen2ForCausalLM,
+        Qwen2ForQuestionAnswering,
        Qwen2ForSequenceClassification,
        Qwen2ForTokenClassification,
        Qwen2Model,
@@ -300,7 +301,13 @@ class Qwen2ModelTester:
 # Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Qwen2
 class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
-        (Qwen2Model, Qwen2ForCausalLM, Qwen2ForSequenceClassification, Qwen2ForTokenClassification)
+        (
+            Qwen2Model,
+            Qwen2ForCausalLM,
+            Qwen2ForSequenceClassification,
+            Qwen2ForTokenClassification,
+            Qwen2ForQuestionAnswering,
+        )
        if is_torch_available()
        else ()
    )
@@ -312,6 +319,7 @@ class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
            "token-classification": Qwen2ForTokenClassification,
            "text-generation": Qwen2ForCausalLM,
            "zero-shot": Qwen2ForSequenceClassification,
+            "question-answering": Qwen2ForQuestionAnswering,
        }
        if is_torch_available()
        else {}
--- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
+++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
@@ -43,6 +43,7 @@ if is_torch_available():

    from transformers import (
        Qwen2MoeForCausalLM,
+        Qwen2MoeForQuestionAnswering,
        Qwen2MoeForSequenceClassification,
        Qwen2MoeForTokenClassification,
        Qwen2MoeModel,
@@ -327,7 +328,13 @@ class Qwen2MoeModelTester:
 # Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Qwen2Moe
 class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
-        (Qwen2MoeModel, Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification, Qwen2MoeForTokenClassification)
+        (
+            Qwen2MoeModel,
+            Qwen2MoeForCausalLM,
+            Qwen2MoeForSequenceClassification,
+            Qwen2MoeForTokenClassification,
+            Qwen2MoeForQuestionAnswering,
+        )
        if is_torch_available()
        else ()
    )
@@ -339,6 +346,7 @@ class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            "token-classification": Qwen2MoeForTokenClassification,
            "text-generation": Qwen2MoeForCausalLM,
            "zero-shot": Qwen2MoeForSequenceClassification,
+            "question-answering": Qwen2MoeForQuestionAnswering,
        }
        if is_torch_available()
        else {}