From 28a22834bf609b27d0178b5ba99224f69577a3b6 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 18 Apr 2024 10:35:43 +0200
Subject: [PATCH] Fix all torch pipeline failures except one (#30290)

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 src/transformers/pipelines/__init__.py        |  4 +--
 ..._pipelines_automatic_speech_recognition.py |  2 +-
 ...t_pipelines_document_question_answering.py | 30 ++++++++++---------
 ...ipelines_zero_shot_audio_classification.py | 12 ++++----
 4 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
index e60035dd57..94befaa851 100755
--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -367,8 +367,8 @@ SUPPORTED_TASKS = {
         "pt": (AutoModel,) if is_torch_available() else (),
         "default": {
             "model": {
-                "pt": ("google/vit-base-patch16-224", "29e7a1e183"),
-                "tf": ("google/vit-base-patch16-224", "29e7a1e183"),
+                "pt": ("google/vit-base-patch16-224", "3f49326"),
+                "tf": ("google/vit-base-patch16-224", "3f49326"),
             }
         },
         "type": "image",
diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
index 2e01ab2731..ddf9011808 100644
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -1216,7 +1216,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
     @slow
     def test_whisper_longform(self):
         # fmt: off
-        EXPECTED_RESULT = """ Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on a stained kid's place mat from a defunct dennies. set up a table inside a rusty cargo container down by the Wharf and challenged toothless drifters to the godless bughouse blitz of tournament that is my segment. Meanwhile."""
+        EXPECTED_RESULT = " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!"
         # fmt: on
 
         processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
diff --git a/tests/pipelines/test_pipelines_document_question_answering.py b/tests/pipelines/test_pipelines_document_question_answering.py
index 388be9247b..81febbc8c1 100644
--- a/tests/pipelines/test_pipelines_document_question_answering.py
+++ b/tests/pipelines/test_pipelines_document_question_answering.py
@@ -103,7 +103,9 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
     @require_detectron2
     @require_pytesseract
     def test_small_model_pt(self):
-        dqa_pipeline = pipeline("document-question-answering", model="hf-internal-testing/tiny-random-layoutlmv2")
+        dqa_pipeline = pipeline(
+            "document-question-answering", model="hf-internal-testing/tiny-random-layoutlmv2-for-dqa-test"
+        )
         image = INVOICE_URL
         question = "How many cats are there?"
 
@@ -253,19 +255,19 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
 
         outputs = dqa_pipeline(image=image, question=question, top_k=2)
         self.assertEqual(
-            nested_simplify(outputs, decimals=4),
+            nested_simplify(outputs, decimals=3),
             [
-                {"score": 0.4251, "answer": "us-001", "start": 16, "end": 16},
-                {"score": 0.0819, "answer": "1110212019", "start": 23, "end": 23},
+                {"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
+                {"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
             ],
         )
 
         outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
         self.assertEqual(
-            nested_simplify(outputs, decimals=4),
+            nested_simplify(outputs, decimals=3),
             [
-                {"score": 0.4251, "answer": "us-001", "start": 16, "end": 16},
-                {"score": 0.0819, "answer": "1110212019", "start": 23, "end": 23},
+                {"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
+                {"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
             ],
         )
 
@@ -273,11 +275,11 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
             [{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
         )
         self.assertEqual(
-            nested_simplify(outputs, decimals=4),
+            nested_simplify(outputs, decimals=3),
             [
                 [
-                    {"score": 0.4251, "answer": "us-001", "start": 16, "end": 16},
-                    {"score": 0.0819, "answer": "1110212019", "start": 23, "end": 23},
+                    {"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
+                    {"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
                 ]
             ]
             * 2,
@@ -288,10 +290,10 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
         # This model should also work if `image` is set to None
         outputs = dqa_pipeline({"image": None, "word_boxes": word_boxes, "question": question}, top_k=2)
         self.assertEqual(
-            nested_simplify(outputs, decimals=4),
+            nested_simplify(outputs, decimals=3),
             [
-                {"score": 0.4251, "answer": "us-001", "start": 16, "end": 16},
-                {"score": 0.0819, "answer": "1110212019", "start": 23, "end": 23},
+                {"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
+                {"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
             ],
         )
 
@@ -355,7 +357,7 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
             "document-question-answering",
             model="naver-clova-ix/donut-base-finetuned-docvqa",
             tokenizer=AutoTokenizer.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa"),
-            feature_extractor="naver-clova-ix/donut-base-finetuned-docvqa",
+            image_processor="naver-clova-ix/donut-base-finetuned-docvqa",
         )
 
         image = INVOICE_URL
diff --git a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
index 80414f1722..09b2f56f98 100644
--- a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
@@ -59,8 +59,8 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
         self.assertEqual(
             nested_simplify(output),
             [
-                {"score": 0.999, "label": "Sound of a dog"},
-                {"score": 0.001, "label": "Sound of vaccum cleaner"},
+                {"score": 1.0, "label": "Sound of a dog"},
+                {"score": 0.0, "label": "Sound of vaccum cleaner"},
             ],
         )
 
@@ -69,8 +69,8 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
             nested_simplify(output),
             [
                 [
-                    {"score": 0.999, "label": "Sound of a dog"},
-                    {"score": 0.001, "label": "Sound of vaccum cleaner"},
+                    {"score": 1.0, "label": "Sound of a dog"},
+                    {"score": 0.0, "label": "Sound of vaccum cleaner"},
                 ],
             ]
             * 5,
@@ -82,8 +82,8 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
             nested_simplify(output),
             [
                 [
-                    {"score": 0.999, "label": "Sound of a dog"},
-                    {"score": 0.001, "label": "Sound of vaccum cleaner"},
+                    {"score": 1.0, "label": "Sound of a dog"},
+                    {"score": 0.0, "label": "Sound of vaccum cleaner"},
                 ],
             ]
             * 5,