Fix Blip-2 CI (#21595)

* use fp16 * use fp16 * use fp16 * use fp16 --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2023-02-13 16:44:27 +01:00
parent fd5320bb57
commit edc1e734bf
1 changed files with 10 additions and 6 deletions
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -768,11 +768,13 @@ def prepare_img():
 class Blip2ModelIntegrationTest(unittest.TestCase):
    def test_inference_opt(self):
        processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
-        model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(torch_device)
+        model = Blip2ForConditionalGeneration.from_pretrained(
+            "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16
+        ).to(torch_device)

        # prepare image
        image = prepare_img()
-        inputs = processor(images=image, return_tensors="pt").to(torch_device)
+        inputs = processor(images=image, return_tensors="pt").to(torch_device, dtype=torch.float16)

        predictions = model.generate(**inputs)
        generated_text = processor.batch_decode(predictions, skip_special_tokens=True)[0].strip()
@@ -783,7 +785,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):

        # image and context
        prompt = "Question: which city is this? Answer:"
-        inputs = processor(images=image, text=prompt, return_tensors="pt").to(torch_device)
+        inputs = processor(images=image, text=prompt, return_tensors="pt").to(torch_device, dtype=torch.float16)

        predictions = model.generate(**inputs)
        generated_text = processor.batch_decode(predictions, skip_special_tokens=True)[0].strip()
@@ -797,11 +799,13 @@ class Blip2ModelIntegrationTest(unittest.TestCase):

    def test_inference_t5(self):
        processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
-        model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl").to(torch_device)
+        model = Blip2ForConditionalGeneration.from_pretrained(
+            "Salesforce/blip2-flan-t5-xl", torch_dtype=torch.float16
+        ).to(torch_device)

        # prepare image
        image = prepare_img()
-        inputs = processor(images=image, return_tensors="pt").to(torch_device)
+        inputs = processor(images=image, return_tensors="pt").to(torch_device, dtype=torch.float16)

        predictions = model.generate(**inputs)
        generated_text = processor.batch_decode(predictions, skip_special_tokens=True)[0].strip()
@@ -812,7 +816,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):

        # image and context
        prompt = "Question: which city is this? Answer:"
-        inputs = processor(images=image, text=prompt, return_tensors="pt").to(torch_device)
+        inputs = processor(images=image, text=prompt, return_tensors="pt").to(torch_device, dtype=torch.float16)

        predictions = model.generate(**inputs)
        generated_text = processor.batch_decode(predictions, skip_special_tokens=True)[0].strip()