Support loading Qwen3 MoE GGUF (#39638)
* support loading qwen3 gguf * qwen3moe test cases * fix whitespaces * fix ggml tests
This commit is contained in:
committed by
GitHub
parent
ccb2e0e03b
commit
fb141e2c90
@@ -102,6 +102,20 @@ GGUF_CONFIG_MAPPING = {
|
|||||||
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
|
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
|
||||||
"vocab_size": "vocab_size",
|
"vocab_size": "vocab_size",
|
||||||
},
|
},
|
||||||
|
"qwen3moe": {
|
||||||
|
"context_length": "max_position_embeddings",
|
||||||
|
"block_count": "num_hidden_layers",
|
||||||
|
"feed_forward_length": "intermediate_size",
|
||||||
|
"embedding_length": "hidden_size",
|
||||||
|
"rope.dimension_count": None,
|
||||||
|
"rope.freq_base": "rope_theta",
|
||||||
|
"attention.head_count": "num_attention_heads",
|
||||||
|
"attention.head_count_kv": "num_key_value_heads",
|
||||||
|
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
|
||||||
|
"vocab_size": "vocab_size",
|
||||||
|
"expert_count": "num_experts",
|
||||||
|
"expert_used_count": "num_experts_per_tok",
|
||||||
|
},
|
||||||
"falcon": {
|
"falcon": {
|
||||||
"context_length": "max_position_embeddings",
|
"context_length": "max_position_embeddings",
|
||||||
"block_count": "num_hidden_layers",
|
"block_count": "num_hidden_layers",
|
||||||
@@ -689,6 +703,7 @@ GGUF_TO_FAST_CONVERTERS = {
|
|||||||
"qwen2": GGUFQwen2Converter,
|
"qwen2": GGUFQwen2Converter,
|
||||||
"qwen2_moe": GGUFQwen2Converter,
|
"qwen2_moe": GGUFQwen2Converter,
|
||||||
"qwen3": GGUFQwen2Converter,
|
"qwen3": GGUFQwen2Converter,
|
||||||
|
"qwen3_moe": GGUFQwen2Converter,
|
||||||
"phi3": GGUFPhi3Converter,
|
"phi3": GGUFPhi3Converter,
|
||||||
"bloom": GGUFGPTConverter,
|
"bloom": GGUFGPTConverter,
|
||||||
"falcon": GGUFGPTConverter,
|
"falcon": GGUFGPTConverter,
|
||||||
|
|||||||
@@ -302,6 +302,7 @@ class GgufModelTests(unittest.TestCase):
|
|||||||
gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF"
|
gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF"
|
||||||
gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
|
gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
|
||||||
qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
|
qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
|
||||||
|
qwen3moe_model_id = "Qwen/Qwen3-30B-A3B-GGUF"
|
||||||
|
|
||||||
q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
|
q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
|
||||||
q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
|
q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
|
||||||
@@ -335,6 +336,7 @@ class GgufModelTests(unittest.TestCase):
|
|||||||
bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf"
|
bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf"
|
||||||
bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
|
bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
|
||||||
q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
|
q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
|
||||||
|
q4_k_m_qwen3moe_model_id = "Qwen3-30B-A3B-Q4_K_M.gguf"
|
||||||
|
|
||||||
example_text = "Hello"
|
example_text = "Hello"
|
||||||
|
|
||||||
@@ -973,3 +975,17 @@ class GgufModelTests(unittest.TestCase):
|
|||||||
|
|
||||||
EXPECTED_TEXT = "HelloED\nI need to find the value of the"
|
EXPECTED_TEXT = "HelloED\nI need to find the value of the"
|
||||||
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
|
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
|
||||||
|
|
||||||
|
def test_qwen3moe_q4_k_m(self):
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(self.qwen3moe_model_id, gguf_file=self.q4_k_m_qwen3moe_model_id)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
self.qwen3moe_model_id,
|
||||||
|
gguf_file=self.q4_k_m_qwen3moe_model_id,
|
||||||
|
torch_dtype=torch.float16,
|
||||||
|
)
|
||||||
|
|
||||||
|
text = tokenizer(self.example_text, return_tensors="pt")
|
||||||
|
out = model.generate(**text, max_new_tokens=10)
|
||||||
|
|
||||||
|
EXPECTED_TEXT = "Hello, I am a 20 year old male"
|
||||||
|
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
|
||||||
|
|||||||
Reference in New Issue
Block a user