From 02300273e220932a449a47ebbe453e7789be454b Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Mon, 17 Jun 2024 18:56:51 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A8=20Remove=20dataset=20with=20restri?= =?UTF-8?q?ctive=20license=20(#31452)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit remove dataset with restrictive license --- src/transformers/utils/quantization_config.py | 13 +++++++++---- tests/quantization/gptq/test_gptq.py | 1 - 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index fb2877643a..20d142b83f 100755 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -543,7 +543,7 @@ class GPTQConfig(QuantizationConfigMixin): using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`. dataset (`Union[List[str]]`, *optional*): The dataset used for quantization. You can provide your own dataset in a list of string or just use the - original datasets used in GPTQ paper ['wikitext2','c4','c4-new','ptb','ptb-new'] + original datasets used in GPTQ paper ['wikitext2','c4','c4-new'] group_size (`int`, *optional*, defaults to 128): The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization. damp_percent (`float`, *optional*, defaults to 0.1): @@ -652,15 +652,20 @@ class GPTQConfig(QuantizationConfigMixin): raise ValueError("damp_percent must between 0 and 1.") if self.dataset is not None: if isinstance(self.dataset, str): - if self.dataset not in ["wikitext2", "c4", "c4-new", "ptb", "ptb-new"]: + if self.dataset in ["ptb", "ptb-new"]: + raise ValueError( + f"""{self.dataset} dataset was deprecated. You can only choose between + ['wikitext2','c4','c4-new']""" + ) + if self.dataset not in ["wikitext2", "c4", "c4-new"]: raise ValueError( f"""You have entered a string value for dataset. You can only choose between - ['wikitext2','c4','c4-new','ptb','ptb-new'], but we found {self.dataset}""" + ['wikitext2','c4','c4-new'], but we found {self.dataset}""" ) elif not isinstance(self.dataset, list): raise ValueError( f"""dataset needs to be either a list of string or a value in - ['wikitext2','c4','c4-new','ptb','ptb-new'], but we found {self.dataset}""" + ['wikitext2','c4','c4-new'], but we found {self.dataset}""" ) if self.disable_exllama is None and self.use_exllama is None: diff --git a/tests/quantization/gptq/test_gptq.py b/tests/quantization/gptq/test_gptq.py index 4d15c120d0..b1be9ac8c6 100644 --- a/tests/quantization/gptq/test_gptq.py +++ b/tests/quantization/gptq/test_gptq.py @@ -46,7 +46,6 @@ class GPTQConfigTest(unittest.TestCase): with self.assertRaises(ValueError): GPTQConfig(bits=2, dataset="auto_gpt") GPTQConfig(bits=2, dataset="c4") - GPTQConfig(bits=2, dataset="ptb-new") def test_damp_percent(self): with self.assertRaises(ValueError):