Tool types (#24032)

* Tool types * Tests + fixes * Isolate types * Oops * Review comments + docs * Tests + docs * soundfile -> vision
2023-06-09 13:34:07 -04:00
parent 061580c82c
commit deff5979fe
8 changed files with 521 additions and 15 deletions
--- a/tests/tools/test_agent_types.py
+++ b/tests/tools/test_agent_types.py
@@ -0,0 +1,121 @@
+# coding=utf-8
+# Copyright 2023 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import tempfile
+import unittest
+import uuid
+from pathlib import Path
+
+from transformers.testing_utils import get_tests_dir, require_soundfile, require_torch, require_vision
+from transformers.tools.agent_types import AgentAudio, AgentImage, AgentText
+from transformers.utils import is_soundfile_availble, is_torch_available, is_vision_available
+
+
+if is_torch_available():
+    import torch
+
+if is_soundfile_availble():
+    import soundfile as sf
+
+if is_vision_available():
+    from PIL import Image
+
+
+def get_new_path(suffix="") -> str:
+    directory = tempfile.mkdtemp()
+    return os.path.join(directory, str(uuid.uuid4()) + suffix)
+
+
+@require_soundfile
+@require_torch
+class AgentAudioTests(unittest.TestCase):
+    def test_from_tensor(self):
+        tensor = torch.rand(12, dtype=torch.float64) - 0.5
+        agent_type = AgentAudio(tensor)
+        path = str(agent_type.to_string())
+
+        # Ensure that the tensor and the agent_type's tensor are the same
+        self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
+
+        del agent_type
+
+        # Ensure the path remains even after the object deletion
+        self.assertTrue(os.path.exists(path))
+
+        # Ensure that the file contains the same value as the original tensor
+        new_tensor, _ = sf.read(path)
+        self.assertTrue(torch.allclose(tensor, torch.tensor(new_tensor), atol=1e-4))
+
+    def test_from_string(self):
+        tensor = torch.rand(12, dtype=torch.float64) - 0.5
+        path = get_new_path(suffix=".wav")
+        sf.write(path, tensor, 16000)
+
+        agent_type = AgentAudio(path)
+
+        self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
+        self.assertEqual(agent_type.to_string(), path)
+
+
+@require_vision
+@require_torch
+class AgentImageTests(unittest.TestCase):
+    def test_from_tensor(self):
+        tensor = torch.randint(0, 256, (64, 64, 3))
+        agent_type = AgentImage(tensor)
+        path = str(agent_type.to_string())
+
+        # Ensure that the tensor and the agent_type's tensor are the same
+        self.assertTrue(torch.allclose(tensor, agent_type._tensor, atol=1e-4))
+
+        self.assertIsInstance(agent_type.to_raw(), Image.Image)
+
+        # Ensure the path remains even after the object deletion
+        del agent_type
+        self.assertTrue(os.path.exists(path))
+
+    def test_from_string(self):
+        path = Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png"
+        image = Image.open(path)
+        agent_type = AgentImage(path)
+
+        self.assertTrue(path.samefile(agent_type.to_string()))
+        self.assertTrue(image == agent_type.to_raw())
+
+        # Ensure the path remains even after the object deletion
+        del agent_type
+        self.assertTrue(os.path.exists(path))
+
+    def test_from_image(self):
+        path = Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png"
+        image = Image.open(path)
+        agent_type = AgentImage(image)
+
+        self.assertFalse(path.samefile(agent_type.to_string()))
+        self.assertTrue(image == agent_type.to_raw())
+
+        # Ensure the path remains even after the object deletion
+        del agent_type
+        self.assertTrue(os.path.exists(path))
+
+
+class AgentTextTests(unittest.TestCase):
+    def test_from_string(self):
+        string = "Hey!"
+        agent_type = AgentText(string)
+
+        self.assertEqual(string, agent_type.to_string())
+        self.assertEqual(string, agent_type.to_raw())
+        self.assertEqual(string, agent_type)
--- a/tests/tools/test_document_question_answering.py
+++ b/tests/tools/test_document_question_answering.py
@@ -30,28 +30,27 @@ class DocumentQuestionAnsweringToolTester(unittest.TestCase, ToolTesterMixin):

    def test_exact_match_arg(self):
        dataset = load_dataset("hf-internal-testing/example-documents", split="test")
-        image = dataset[0]["image"]
+        document = dataset[0]["image"]

-        result = self.tool(image, "When is the coffee break?")
+        result = self.tool(document, "When is the coffee break?")
        self.assertEqual(result, "11-14 to 11:39 a.m.")

    def test_exact_match_arg_remote(self):
        dataset = load_dataset("hf-internal-testing/example-documents", split="test")
-        image = dataset[0]["image"]
+        document = dataset[0]["image"]

-        result = self.remote_tool(image, "When is the coffee break?")
+        result = self.remote_tool(document, "When is the coffee break?")
        self.assertEqual(result, "11-14 to 11:39 a.m.")

    def test_exact_match_kwarg(self):
        dataset = load_dataset("hf-internal-testing/example-documents", split="test")
-        image = dataset[0]["image"]
+        document = dataset[0]["image"]

-        result = self.tool(image=image, question="When is the coffee break?")
-        self.assertEqual(result, "11-14 to 11:39 a.m.")
+        self.tool(document=document, question="When is the coffee break?")

    def test_exact_match_kwarg_remote(self):
        dataset = load_dataset("hf-internal-testing/example-documents", split="test")
-        image = dataset[0]["image"]
+        document = dataset[0]["image"]

-        result = self.remote_tool(image=image, question="When is the coffee break?")
+        result = self.remote_tool(document=document, question="When is the coffee break?")
        self.assertEqual(result, "11-14 to 11:39 a.m.")
--- a/tests/tools/test_text_to_speech.py
+++ b/tests/tools/test_text_to_speech.py
@@ -37,9 +37,11 @@ class TextToSpeechToolTester(unittest.TestCase, ToolTesterMixin):
        # SpeechT5 isn't deterministic
        torch.manual_seed(0)
        result = self.tool("hey")
+        resulting_tensor = result.to_raw()
        self.assertTrue(
            torch.allclose(
-                result[:3], torch.tensor([-0.00040140701457858086, -0.0002551682700868696, -0.00010294507956132293])
+                resulting_tensor[:3],
+                torch.tensor([-0.0005966668832115829, -0.0003657640190795064, -0.00013439502799883485]),
            )
        )

@@ -47,8 +49,10 @@ class TextToSpeechToolTester(unittest.TestCase, ToolTesterMixin):
        # SpeechT5 isn't deterministic
        torch.manual_seed(0)
        result = self.tool("hey")
+        resulting_tensor = result.to_raw()
        self.assertTrue(
            torch.allclose(
-                result[:3], torch.tensor([-0.00040140701457858086, -0.0002551682700868696, -0.00010294507956132293])
+                resulting_tensor[:3],
+                torch.tensor([-0.0005966668832115829, -0.0003657640190795064, -0.00013439502799883485]),
            )
        )
--- a/tests/tools/test_tools_common.py
+++ b/tests/tools/test_tools_common.py
@@ -18,6 +18,7 @@ from typing import List

 from transformers import is_torch_available, is_vision_available
 from transformers.testing_utils import get_tests_dir, is_tool_test
+from transformers.tools.agent_types import AGENT_TYPE_MAPPING, AgentAudio, AgentImage, AgentText


 if is_torch_available():
@@ -54,11 +55,11 @@ def output_types(outputs: List):
    output_types = []

    for output in outputs:
-        if isinstance(output, str):
+        if isinstance(output, (str, AgentText)):
            output_types.append("text")
-        elif isinstance(output, Image.Image):
+        elif isinstance(output, (Image.Image, AgentImage)):
            output_types.append("image")
-        elif isinstance(output, torch.Tensor):
+        elif isinstance(output, (torch.Tensor, AgentAudio)):
            output_types.append("audio")
        else:
            raise ValueError(f"Invalid output: {output}")
@@ -98,3 +99,35 @@ class ToolTesterMixin:
        self.assertTrue(hasattr(self.tool, "description"))
        self.assertTrue(hasattr(self.tool, "default_checkpoint"))
        self.assertTrue(self.tool.description.startswith("This is a tool that"))
+
+    def test_agent_types_outputs(self):
+        inputs = create_inputs(self.tool.inputs)
+        outputs = self.tool(*inputs)
+
+        if not isinstance(outputs, list):
+            outputs = [outputs]
+
+        self.assertEqual(len(outputs), len(self.tool.outputs))
+
+        for output, output_type in zip(outputs, self.tool.outputs):
+            agent_type = AGENT_TYPE_MAPPING[output_type]
+            self.assertTrue(isinstance(output, agent_type))
+
+    def test_agent_types_inputs(self):
+        inputs = create_inputs(self.tool.inputs)
+
+        _inputs = []
+
+        for _input, input_type in zip(inputs, self.tool.inputs):
+            if isinstance(input_type, list):
+                _inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
+            else:
+                _inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
+
+        # Should not raise an error
+        outputs = self.tool(*inputs)
+
+        if not isinstance(outputs, list):
+            outputs = [outputs]
+
+        self.assertEqual(len(outputs), len(self.tool.outputs))
--- a/tests/tools/test_translation.py
+++ b/tests/tools/test_translation.py
@@ -16,6 +16,7 @@
 import unittest

 from transformers import load_tool
+from transformers.tools.agent_types import AGENT_TYPE_MAPPING

 from .test_tools_common import ToolTesterMixin, output_types

@@ -51,3 +52,35 @@ class TranslationToolTester(unittest.TestCase, ToolTesterMixin):
            outputs = [outputs]

        self.assertListEqual(output_types(outputs), self.tool.outputs)
+
+    def test_agent_types_outputs(self):
+        inputs = ["Hey, what's up?", "English", "Spanish"]
+        outputs = self.tool(*inputs)
+
+        if not isinstance(outputs, list):
+            outputs = [outputs]
+
+        self.assertEqual(len(outputs), len(self.tool.outputs))
+
+        for output, output_type in zip(outputs, self.tool.outputs):
+            agent_type = AGENT_TYPE_MAPPING[output_type]
+            self.assertTrue(isinstance(output, agent_type))
+
+    def test_agent_types_inputs(self):
+        inputs = ["Hey, what's up?", "English", "Spanish"]
+
+        _inputs = []
+
+        for _input, input_type in zip(inputs, self.tool.inputs):
+            if isinstance(input_type, list):
+                _inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
+            else:
+                _inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
+
+        # Should not raise an error
+        outputs = self.tool(*inputs)
+
+        if not isinstance(outputs, list):
+            outputs = [outputs]
+
+        self.assertEqual(len(outputs), len(self.tool.outputs))