fix pixtral processor (#34486)

* fix pixtral processor * test out full length batches + remove undue ValueError * fix up processing * fix tests * fix * last fixup * style * [run-slow] pixtral * [run-slow] pixtral * fix config key * skip torchscript tests * [run-slow] pixtral * add missing key * [run-slow] pixtral * fix docs * [run-slow] pixtral * fix wrong url for integration test * [run-slow] pixtral * pixtralVisionModel does not have a lm head * [run-slow] pixtral
2024-10-30 14:17:20 +01:00
parent 8a734ea2c3
commit 241d79026f
5 changed files with 35 additions and 48 deletions
--- a/tests/models/pixtral/test_modeling_pixtral.py
+++ b/tests/models/pixtral/test_modeling_pixtral.py
@@ -14,22 +14,16 @@
 # limitations under the License.
 """Testing suite for the PyTorch Pixtral model."""

-import gc
 import unittest

-import requests
-
 from transformers import (
-    AutoProcessor,
    PixtralVisionConfig,
    PixtralVisionModel,
    is_torch_available,
    is_vision_available,
 )
 from transformers.testing_utils import (
-    require_bitsandbytes,
    require_torch,
-    slow,
    torch_device,
 )

@@ -43,7 +37,7 @@ else:
    is_torch_greater_or_equal_than_2_0 = False

 if is_vision_available():
-    from PIL import Image
+    pass


 class PixtralVisionModelTester:
@@ -148,6 +142,7 @@ class PixtralVisionModelModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (PixtralVisionModel,) if is_torch_available() else ()
    test_pruning = False
    test_head_masking = False
+    test_torchscript = False

    def setUp(self):
        self.model_tester = PixtralVisionModelTester(self)
@@ -258,35 +253,3 @@ class PixtralVisionModelModelTest(ModelTesterMixin, unittest.TestCase):
    @unittest.skip(reason="Not supported yet")
    def test_determinism(self):
        pass
-
-
-@require_torch
-class PixtralVisionModelIntegrationTest(unittest.TestCase):
-    def setUp(self):
-        self.processor = AutoProcessor.from_pretrained("hf-internal-testing/pixtral-12b")
-
-    def tearDown(self):
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    @slow
-    @require_bitsandbytes
-    def test_small_model_integration_test(self):
-        # Let' s make sure we test the preprocessing to replace what is used
-        model = PixtralVisionModel.from_pretrained("hf-internal-testing/pixtral-12b", load_in_4bit=True)
-
-        prompt = "<s>[INST][IMG]\nWhat are the things I should be cautious about when I visit this place?[/INST]"
-        image_file = "https://pixtral-vl.github.io/static/images/view.jpg"
-        raw_image = Image.open(requests.get(image_file, stream=True).raw)
-        inputs = self.processor(prompt, raw_image, return_tensors="pt")
-
-        EXPECTED_INPUT_IDS = torch.tensor([[1, 32000, 28705, 13, 11123, 28747, 1824, 460, 272, 1722,315, 1023, 347, 13831, 925, 684, 739, 315, 3251, 456,1633, 28804, 13, 4816, 8048, 12738, 28747]])  # fmt: skip
-        self.assertTrue(torch.equal(inputs["input_ids"], EXPECTED_INPUT_IDS))
-
-        output = model.generate(**inputs, max_new_tokens=20)
-        EXPECTED_DECODED_TEXT = "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT: When visiting this place, there are a few things one should be cautious about. Firstly,"  # fmt: skip
-
-        self.assertEqual(
-            self.processor.decode(output[0], skip_special_tokens=True),
-            EXPECTED_DECODED_TEXT,
-        )