support loading qwen3 gguf (#38645)

* support loading qwen3 gguf

* Add qwen3 into GGUF_TO_FAST_CONVERTERS for tokenizer conversion

* Add testcase

* Fix formatting
This commit is contained in:
44670
2025-07-15 17:53:41 +08:00
committed by GitHub
parent 0e4b7938d0
commit 2b79f14375
2 changed files with 31 additions and 0 deletions

View File

@@ -90,6 +90,18 @@ GGUF_CONFIG_MAPPING = {
"expert_count": "num_experts", "expert_count": "num_experts",
"expert_used_count": "num_experts_per_tok", "expert_used_count": "num_experts_per_tok",
}, },
"qwen3": {
"context_length": "max_position_embeddings",
"block_count": "num_hidden_layers",
"feed_forward_length": "intermediate_size",
"embedding_length": "hidden_size",
"rope.dimension_count": None,
"rope.freq_base": "rope_theta",
"attention.head_count": "num_attention_heads",
"attention.head_count_kv": "num_key_value_heads",
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
"vocab_size": "vocab_size",
},
"falcon": { "falcon": {
"context_length": "max_position_embeddings", "context_length": "max_position_embeddings",
"block_count": "num_hidden_layers", "block_count": "num_hidden_layers",
@@ -676,6 +688,7 @@ GGUF_TO_FAST_CONVERTERS = {
"llama": GGUFLlamaConverter, "llama": GGUFLlamaConverter,
"qwen2": GGUFQwen2Converter, "qwen2": GGUFQwen2Converter,
"qwen2_moe": GGUFQwen2Converter, "qwen2_moe": GGUFQwen2Converter,
"qwen3": GGUFQwen2Converter,
"phi3": GGUFPhi3Converter, "phi3": GGUFPhi3Converter,
"bloom": GGUFGPTConverter, "bloom": GGUFGPTConverter,
"falcon": GGUFGPTConverter, "falcon": GGUFGPTConverter,

View File

@@ -301,6 +301,7 @@ class GgufModelTests(unittest.TestCase):
gemma3_qat_model_id = "google/gemma-3-1b-it-qat-q4_0-gguf" gemma3_qat_model_id = "google/gemma-3-1b-it-qat-q4_0-gguf"
gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF" gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF"
gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF" gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf" q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf" q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
@@ -333,6 +334,7 @@ class GgufModelTests(unittest.TestCase):
q4_0_gemma3_qat_model_id = "gemma-3-1b-it-q4_0.gguf" q4_0_gemma3_qat_model_id = "gemma-3-1b-it-q4_0.gguf"
bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf" bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf"
bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf" bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
example_text = "Hello" example_text = "Hello"
@@ -955,3 +957,19 @@ class GgufModelTests(unittest.TestCase):
torch.testing.assert_close(original_params, converted_state_dict[layer_name]) torch.testing.assert_close(original_params, converted_state_dict[layer_name])
else: else:
raise ValueError(f"Layer {layer_name} is not presented in GGUF model") raise ValueError(f"Layer {layer_name} is not presented in GGUF model")
@require_read_token
@unittest.skipUnless(is_gguf_available("0.16.0"), "test requires gguf version >= 0.16.0")
def test_qwen3_q8_0(self):
tokenizer = AutoTokenizer.from_pretrained(self.qwen3_model_id, gguf_file=self.q8_0_qwen3_model_id)
model = AutoModelForCausalLM.from_pretrained(
self.qwen3_model_id,
gguf_file=self.q8_0_qwen3_model_id,
torch_dtype=torch.float16,
)
text = tokenizer(self.example_text, return_tensors="pt")["input_ids"]
out = model.generate(text, max_new_tokens=10)
EXPECTED_TEXT = "HelloED\nI need to find the value of the"
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)