chore: fix typos in tests directory (#36785)
* chore: fix typos in tests directory * chore: fix typos in tests directory * chore: fix typos in tests directory * chore: fix typos in tests directory * chore: fix typos in tests directory * chore: fix typos in tests directory * chore: fix typos in tests directory
This commit is contained in:
@@ -13,7 +13,7 @@ The following is the recipe on how to effectively debug `bitsandbytes` integrati
|
||||
|
||||
The following instructions are tested with 2 NVIDIA-Tesla T4 GPUs. To run successfully `bitsandbytes` you would need a 8-bit core tensor supported GPU. Note that Turing, Ampere or newer architectures - e.g. T4, RTX20s RTX30s, A40-A100, A6000 should be supported.
|
||||
|
||||
## Virutal envs
|
||||
## Virtual envs
|
||||
|
||||
```bash
|
||||
conda create --name int8-testing python==3.8
|
||||
@@ -61,7 +61,7 @@ This happens when some Linear weights are set to the CPU when using `accelerate`
|
||||
|
||||
Use the latest version of `accelerate` with a command such as: `pip install -U accelerate` and the problem should be solved.
|
||||
|
||||
### `Parameter has no attribue .CB`
|
||||
### `Parameter has no attribute .CB`
|
||||
|
||||
Same solution as above.
|
||||
|
||||
@@ -71,7 +71,7 @@ Run your script by pre-pending `CUDA_LAUNCH_BLOCKING=1` and you should observe a
|
||||
|
||||
### `CUDA illegal memory error: an illegal memory access at line...`:
|
||||
|
||||
Check the CUDA verisons with:
|
||||
Check the CUDA versions with:
|
||||
```bash
|
||||
nvcc --version
|
||||
```
|
||||
|
||||
@@ -179,7 +179,7 @@ class Bnb4BitTest(Base4bitTest):
|
||||
|
||||
def test_original_dtype(self):
|
||||
r"""
|
||||
A simple test to check if the model succesfully stores the original dtype
|
||||
A simple test to check if the model successfully stores the original dtype
|
||||
"""
|
||||
self.assertTrue(hasattr(self.model_4bit.config, "_pre_quantization_dtype"))
|
||||
self.assertFalse(hasattr(self.model_fp16.config, "_pre_quantization_dtype"))
|
||||
@@ -496,8 +496,8 @@ class Pipeline4BitTest(Base4bitTest):
|
||||
def test_pipeline(self):
|
||||
r"""
|
||||
The aim of this test is to verify that the mixed 4bit is compatible with `pipeline` from transformers. Since
|
||||
we used pipline for inference speed benchmarking we want to make sure that this feature does not break anything
|
||||
on pipline.
|
||||
we used pipeline for inference speed benchmarking we want to make sure that this feature does not break anything
|
||||
on pipeline.
|
||||
"""
|
||||
# self._clear_cuda_cache()
|
||||
self.pipe = pipeline(
|
||||
|
||||
@@ -213,7 +213,7 @@ class MixedInt8Test(BaseMixedInt8Test):
|
||||
|
||||
def test_original_dtype(self):
|
||||
r"""
|
||||
A simple test to check if the model succesfully stores the original dtype
|
||||
A simple test to check if the model successfully stores the original dtype
|
||||
"""
|
||||
self.assertTrue(hasattr(self.model_8bit.config, "_pre_quantization_dtype"))
|
||||
self.assertFalse(hasattr(self.model_fp16.config, "_pre_quantization_dtype"))
|
||||
@@ -655,8 +655,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test):
|
||||
def test_pipeline(self):
|
||||
r"""
|
||||
The aim of this test is to verify that the mixed int8 is compatible with `pipeline` from transformers. Since
|
||||
we used pipline for inference speed benchmarking we want to make sure that this feature does not break anything
|
||||
on pipline.
|
||||
we used pipeline for inference speed benchmarking we want to make sure that this feature does not break anything
|
||||
on pipeline.
|
||||
"""
|
||||
# self._clear_cuda_cache()
|
||||
self.pipe = pipeline(
|
||||
|
||||
@@ -167,7 +167,7 @@ class GPTQTest(unittest.TestCase):
|
||||
|
||||
def test_original_dtype(self):
|
||||
r"""
|
||||
A simple test to check if the model succesfully stores the original dtype
|
||||
A simple test to check if the model successfully stores the original dtype
|
||||
"""
|
||||
self.assertTrue(hasattr(self.quantized_model.config, "_pre_quantization_dtype"))
|
||||
self.assertFalse(hasattr(self.model_fp16.config, "_pre_quantization_dtype"))
|
||||
@@ -261,7 +261,7 @@ class GPTQTest(unittest.TestCase):
|
||||
if self.device_map == "cpu":
|
||||
quant_type = "ipex" if is_ipex_available() else "torch"
|
||||
else:
|
||||
# We expecte tritonv2 to be used here, because exllama backend doesn't support packing https://github.com/ModelCloud/GPTQModel/issues/1354
|
||||
# We expect tritonv2 to be used here, because exllama backend doesn't support packing https://github.com/ModelCloud/GPTQModel/issues/1354
|
||||
# TODO: Remove this once GPTQModel exllama kernels supports packing
|
||||
quant_type = "tritonv2"
|
||||
quantized_model_from_saved = AutoModelForCausalLM.from_pretrained(
|
||||
@@ -433,7 +433,7 @@ class GPTQTestExllamaV2(unittest.TestCase):
|
||||
"exllamav2",
|
||||
)
|
||||
else:
|
||||
# We expecte tritonv2 to be used here, because exllama backend doesn't support packing https://github.com/ModelCloud/GPTQModel/issues/1354
|
||||
# We expect tritonv2 to be used here, because exllama backend doesn't support packing https://github.com/ModelCloud/GPTQModel/issues/1354
|
||||
# TODO: Remove this once GPTQModel exllama kernels supports packing
|
||||
self.assertEqual(
|
||||
self.quantized_model.model.layers[0].self_attn.k_proj.QUANT_TYPE,
|
||||
@@ -458,7 +458,7 @@ class GPTQTestExllamaV2(unittest.TestCase):
|
||||
|
||||
def test_generate_quality(self):
|
||||
"""
|
||||
Simple test to check the quality of the model by comapring the the generated tokens with the expected tokens
|
||||
Simple test to check the quality of the model by comparing the the generated tokens with the expected tokens
|
||||
"""
|
||||
self.check_inference_correctness(self.quantized_model)
|
||||
|
||||
|
||||
@@ -184,7 +184,7 @@ class HiggsTest(unittest.TestCase):
|
||||
output = model.generate(**input_ids, max_new_tokens=self.max_new_tokens)
|
||||
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
|
||||
|
||||
@unittest.skip("This will almost surely OOM. Enable when swithed to a smaller model")
|
||||
@unittest.skip("This will almost surely OOM. Enable when switched to a smaller model")
|
||||
def test_dequantize(self):
|
||||
"""
|
||||
Test the ability to dequantize a model
|
||||
|
||||
@@ -202,7 +202,7 @@ class TorchAoGPUTest(TorchAoTest):
|
||||
|
||||
def test_int4wo_offload(self):
|
||||
"""
|
||||
Simple test that checks if the quantized model int4 wieght only is working properly with cpu/disk offload
|
||||
Simple test that checks if the quantized model int4 weight only is working properly with cpu/disk offload
|
||||
"""
|
||||
|
||||
device_map_offload = {
|
||||
@@ -254,7 +254,7 @@ class TorchAoGPUTest(TorchAoTest):
|
||||
@require_torch_multi_gpu
|
||||
def test_int4wo_quant_multi_gpu(self):
|
||||
"""
|
||||
Simple test that checks if the quantized model int4 wieght only is working properly with multiple GPUs
|
||||
Simple test that checks if the quantized model int4 weight only is working properly with multiple GPUs
|
||||
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
|
||||
"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user