[CI] post-GptOss fixes for green CI (#39929)

This commit is contained in:
Joao Gante
2025-08-05 19:04:59 +01:00
committed by GitHub
parent eb6e26acf3
commit b771e476a8
10 changed files with 21 additions and 16 deletions

View File

@@ -511,6 +511,8 @@
title: GPT2
- local: model_doc/gpt_bigcode
title: GPTBigCode
- local: model_doc/gpt_oss
title: GptOss
- local: model_doc/gptsan-japanese
title: GPTSAN Japanese
- local: model_doc/gpt-sw3
@@ -617,8 +619,6 @@
title: OLMoE
- local: model_doc/open-llama
title: Open-Llama
- local: model_doc/openai_moe
title: OpenAIMoe
- local: model_doc/opt
title: OPT
- local: model_doc/pegasus

View File

@@ -65,6 +65,10 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
[[autodoc]] HqqConfig
## Mxfp4Config
[[autodoc]] Mxfp4Config
## FbgemmFp8Config
[[autodoc]] FbgemmFp8Config

View File

@@ -24,11 +24,11 @@ rendered properly in your Markdown viewer.
</div>
</div>
# OpenAIMoE
# GptOss
## Overview
The OpenAIMoE model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
The GptOss model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
<INSERT SHORT SUMMARY HERE>
The abstract from the paper is the following:
@@ -43,16 +43,16 @@ This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface
The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
## OpenAIMoeConfig
## GptOssConfig
[[autodoc]] OpenAIMoeConfig
[[autodoc]] GptOssConfig
## OpenAIMoeModel
## GptOssModel
[[autodoc]] OpenAIMoeModel
[[autodoc]] GptOssModel
- forward
## OpenAIMoeForCausalLM
## GptOssForCausalLM
[[autodoc]] OpenAIMoeForCausalLM
[[autodoc]] GptOssForCausalLM
- forward

View File

@@ -40,7 +40,7 @@ if is_torch_flex_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.jetmoe.modeling_jetmoe.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,

View File

@@ -67,7 +67,7 @@ is_fast_path_available = all(
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func with gate->router
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func with gate->router
def load_balancing_loss_func(
router_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,

View File

@@ -50,7 +50,7 @@ if is_flash_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,

View File

@@ -39,7 +39,7 @@ if is_flash_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,

View File

@@ -55,7 +55,7 @@ _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_m
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,

View File

@@ -59,7 +59,6 @@ if is_torch_flex_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,

View File

@@ -345,6 +345,8 @@ SPECIAL_CASES_TO_ALLOW.update(
"IdeficsConfig": True,
"IdeficsVisionConfig": True,
"IdeficsPerceiverConfig": True,
# TODO: @Arthur/Joao (`hidden_act` unused)
"GptOssConfig": True,
}
)