Mistral-related models for QnA (#34045)

* mistral qna start

* mixtral qna

* oops

* qwen2 qna

* qwen2moe qna

* add missing input embed methods

* add copied to all methods, can't directly from llama due to the prefix

* make top level copied from
This commit is contained in:
Anton Vlasjuk
2024-10-14 08:53:32 +02:00
committed by GitHub
parent 37ea04013b
commit 7434c0ed21
19 changed files with 507 additions and 4 deletions

View File

@@ -47,6 +47,7 @@ if is_torch_available():
from transformers import (
MistralForCausalLM,
MistralForQuestionAnswering,
MistralForSequenceClassification,
MistralForTokenClassification,
MistralModel,
@@ -291,7 +292,13 @@ class MistralModelTester:
@require_torch
class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(MistralModel, MistralForCausalLM, MistralForSequenceClassification, MistralForTokenClassification)
(
MistralModel,
MistralForCausalLM,
MistralForSequenceClassification,
MistralForTokenClassification,
MistralForQuestionAnswering,
)
if is_torch_available()
else ()
)
@@ -303,6 +310,7 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
"token-classification": MistralForTokenClassification,
"text-generation": MistralForCausalLM,
"zero-shot": MistralForSequenceClassification,
"question-answering": MistralForQuestionAnswering,
}
if is_torch_available()
else {}

View File

@@ -41,6 +41,7 @@ if is_torch_available():
from transformers import (
MixtralForCausalLM,
MixtralForQuestionAnswering,
MixtralForSequenceClassification,
MixtralForTokenClassification,
MixtralModel,
@@ -291,7 +292,13 @@ class MixtralModelTester:
# Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Mixtral
class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(MixtralModel, MixtralForCausalLM, MixtralForSequenceClassification, MixtralForTokenClassification)
(
MixtralModel,
MixtralForCausalLM,
MixtralForSequenceClassification,
MixtralForTokenClassification,
MixtralForQuestionAnswering,
)
if is_torch_available()
else ()
)
@@ -303,6 +310,7 @@ class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
"token-classification": MixtralForTokenClassification,
"text-generation": MixtralForCausalLM,
"zero-shot": MixtralForSequenceClassification,
"question-answering": MixtralForQuestionAnswering,
}
if is_torch_available()
else {}

View File

@@ -43,6 +43,7 @@ if is_torch_available():
from transformers import (
Qwen2ForCausalLM,
Qwen2ForQuestionAnswering,
Qwen2ForSequenceClassification,
Qwen2ForTokenClassification,
Qwen2Model,
@@ -300,7 +301,13 @@ class Qwen2ModelTester:
# Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Qwen2
class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(Qwen2Model, Qwen2ForCausalLM, Qwen2ForSequenceClassification, Qwen2ForTokenClassification)
(
Qwen2Model,
Qwen2ForCausalLM,
Qwen2ForSequenceClassification,
Qwen2ForTokenClassification,
Qwen2ForQuestionAnswering,
)
if is_torch_available()
else ()
)
@@ -312,6 +319,7 @@ class Qwen2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
"token-classification": Qwen2ForTokenClassification,
"text-generation": Qwen2ForCausalLM,
"zero-shot": Qwen2ForSequenceClassification,
"question-answering": Qwen2ForQuestionAnswering,
}
if is_torch_available()
else {}

View File

@@ -43,6 +43,7 @@ if is_torch_available():
from transformers import (
Qwen2MoeForCausalLM,
Qwen2MoeForQuestionAnswering,
Qwen2MoeForSequenceClassification,
Qwen2MoeForTokenClassification,
Qwen2MoeModel,
@@ -327,7 +328,13 @@ class Qwen2MoeModelTester:
# Copied from tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Qwen2Moe
class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(Qwen2MoeModel, Qwen2MoeForCausalLM, Qwen2MoeForSequenceClassification, Qwen2MoeForTokenClassification)
(
Qwen2MoeModel,
Qwen2MoeForCausalLM,
Qwen2MoeForSequenceClassification,
Qwen2MoeForTokenClassification,
Qwen2MoeForQuestionAnswering,
)
if is_torch_available()
else ()
)
@@ -339,6 +346,7 @@ class Qwen2MoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
"token-classification": Qwen2MoeForTokenClassification,
"text-generation": Qwen2MoeForCausalLM,
"zero-shot": Qwen2MoeForSequenceClassification,
"question-answering": Qwen2MoeForQuestionAnswering,
}
if is_torch_available()
else {}