Add T5 GGUF loading support (#33389)

* add: GGUFT5Converter

* add: tensormapping for t5

* add: test code for t5

* fix: Remove whitespace from blank line

* add: t5 fp16 tests

* fix: whitespace formatting

* fix: minor formatting

* fix: testing every weights
This commit is contained in:
김준재
2024-10-24 22:10:59 +09:00
committed by GitHub
parent 30c76d5b28
commit dd267fca72
5 changed files with 197 additions and 7 deletions

View File

@@ -15,7 +15,7 @@
import tempfile
import unittest
from transformers import AddedToken, AutoModelForCausalLM, AutoTokenizer
from transformers import AddedToken, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
from transformers.testing_utils import (
require_gguf,
require_torch_gpu,
@@ -48,6 +48,8 @@ class GgufIntegrationTests(unittest.TestCase):
falcon7b_model_id = "xaviviro/falcon-7b-quantized-gguf"
falcon40b_model_id = "maddes8cht/tiiuae-falcon-40b-gguf"
original_flacon7b_model_id = "tiiuae/falcon-7b"
t5_model_id = "repetitio/flan-t5-small"
original_t5_model_id = "google/flan-t5-small"
stablelm_model_id = "afrideva/stablelm-3b-4e1t-GGUF"
stablelm2_model_id = "afrideva/stablelm-2-1_6b-GGUF"
original_stablelm2_model_id = "stabilityai/stablelm-2-1_6b"
@@ -92,6 +94,8 @@ class GgufIntegrationTests(unittest.TestCase):
q2_k_falcon7b_model_id = "falcon-7b-q2_k.gguf"
fp16_falcon7b_model_id = "falcon-7b-fp16.gguf"
q2_k_falcon40b_model_id = "tiiuae-falcon-40b-Q2_K.gguf"
fp16_t5_model_id = "flan-t5-small-f16.gguf"
q8_0_t5_model_id = "flan-t5-small-q8_0.gguf"
fp16_qwen2moe_model_id = "Qwen1.5-MoE-A2.7B.gguf"
fp16_gpt2_model_id = "gpt2.f16.gguf"
q8_gpt2_model_id = "gpt2.Q8_0.gguf"
@@ -487,6 +491,56 @@ class GgufIntegrationTests(unittest.TestCase):
self.assertTrue(quantized_param.shape == original_param.shape)
torch.testing.assert_close(quantized_param, original_param)
def test_t5_f16(self):
tokenizer = AutoTokenizer.from_pretrained(self.t5_model_id, gguf_file=self.fp16_t5_model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(
self.t5_model_id, gguf_file=self.fp16_t5_model_id, device_map="auto", torch_dtype=torch.float16
)
T5_EXAMPLE_TEXT = "translate English to German: How old are you?"
text = tokenizer(T5_EXAMPLE_TEXT, return_tensors="pt").to(torch_device)
out = model.generate(**text, max_new_tokens=10)
EXPECTED_TEXT = "Wie ich er?"
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
def test_t5_q8_0(self):
tokenizer = AutoTokenizer.from_pretrained(self.t5_model_id, gguf_file=self.q8_0_t5_model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(
self.t5_model_id, gguf_file=self.q8_0_t5_model_id, device_map="auto", torch_dtype=torch.float16
)
T5_EXAMPLE_TEXT = "translate English to German: How old are you?"
text = tokenizer(T5_EXAMPLE_TEXT, return_tensors="pt").to(torch_device)
out = model.generate(**text, max_new_tokens=10)
EXPECTED_TEXT = "Wie ich er?"
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
def test_t5_weights_conversion_fp16(self):
quantized_model = AutoModelForSeq2SeqLM.from_pretrained(
self.t5_model_id,
gguf_file=self.fp16_t5_model_id,
device_map="auto",
torch_dtype=torch.float16,
)
original_model = AutoModelForSeq2SeqLM.from_pretrained(
self.original_t5_model_id,
device_map="auto",
torch_dtype=torch.float16,
)
quantized_state_dict = quantized_model.state_dict()
original_state_dict = original_model.state_dict()
for (quantized_name, quantized_param), (original_name, original_param) in zip(
quantized_state_dict.items(), original_state_dict.items()
):
self.assertTrue(quantized_param.shape == original_param.shape)
torch.testing.assert_close(quantized_param, original_param, rtol=5e-04, atol=5e-04)
def test_gpt2_q8(self):
tokenizer = AutoTokenizer.from_pretrained(self.gpt2_model_id, gguf_file=self.q8_gpt2_model_id)
model = AutoModelForCausalLM.from_pretrained(