From 02300273e220932a449a47ebbe453e7789be454b Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Mon, 17 Jun 2024 18:56:51 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A8=20Remove=20dataset=20with=20restri?=
 =?UTF-8?q?ctive=20license=20(#31452)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove dataset with restrictive license
---
 src/transformers/utils/quantization_config.py | 13 +++++++++----
 tests/quantization/gptq/test_gptq.py          |  1 -
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py
index fb2877643a..20d142b83f 100755
--- a/src/transformers/utils/quantization_config.py
+++ b/src/transformers/utils/quantization_config.py
@@ -543,7 +543,7 @@ class GPTQConfig(QuantizationConfigMixin):
                     using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
         dataset (`Union[List[str]]`, *optional*):
             The dataset used for quantization. You can provide your own dataset in a list of string or just use the
-            original datasets used in GPTQ paper ['wikitext2','c4','c4-new','ptb','ptb-new']
+            original datasets used in GPTQ paper ['wikitext2','c4','c4-new']
         group_size (`int`, *optional*, defaults to 128):
             The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization.
         damp_percent (`float`, *optional*, defaults to 0.1):
@@ -652,15 +652,20 @@ class GPTQConfig(QuantizationConfigMixin):
             raise ValueError("damp_percent must between 0 and 1.")
         if self.dataset is not None:
             if isinstance(self.dataset, str):
-                if self.dataset not in ["wikitext2", "c4", "c4-new", "ptb", "ptb-new"]:
+                if self.dataset in ["ptb", "ptb-new"]:
+                    raise ValueError(
+                        f"""{self.dataset} dataset was deprecated. You can only choose between
+                        ['wikitext2','c4','c4-new']"""
+                    )
+                if self.dataset not in ["wikitext2", "c4", "c4-new"]:
                     raise ValueError(
                         f"""You have entered a string value for dataset. You can only choose between
-                        ['wikitext2','c4','c4-new','ptb','ptb-new'], but we found {self.dataset}"""
+                        ['wikitext2','c4','c4-new'], but we found {self.dataset}"""
                     )
             elif not isinstance(self.dataset, list):
                 raise ValueError(
                     f"""dataset needs to be either a list of string or a value in
-                    ['wikitext2','c4','c4-new','ptb','ptb-new'], but we found {self.dataset}"""
+                    ['wikitext2','c4','c4-new'], but we found {self.dataset}"""
                 )
 
         if self.disable_exllama is None and self.use_exllama is None:
diff --git a/tests/quantization/gptq/test_gptq.py b/tests/quantization/gptq/test_gptq.py
index 4d15c120d0..b1be9ac8c6 100644
--- a/tests/quantization/gptq/test_gptq.py
+++ b/tests/quantization/gptq/test_gptq.py
@@ -46,7 +46,6 @@ class GPTQConfigTest(unittest.TestCase):
         with self.assertRaises(ValueError):
             GPTQConfig(bits=2, dataset="auto_gpt")
         GPTQConfig(bits=2, dataset="c4")
-        GPTQConfig(bits=2, dataset="ptb-new")
 
     def test_damp_percent(self):
         with self.assertRaises(ValueError):