Tool types (#24032)
* Tool types * Tests + fixes * Isolate types * Oops * Review comments + docs * Tests + docs * soundfile -> vision
This commit is contained in:
121
tests/tools/test_agent_types.py
Normal file
121
tests/tools/test_agent_types.py
Normal file
@@ -0,0 +1,121 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2023 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from transformers.testing_utils import get_tests_dir, require_soundfile, require_torch, require_vision
|
||||
from transformers.tools.agent_types import AgentAudio, AgentImage, AgentText
|
||||
from transformers.utils import is_soundfile_availble, is_torch_available, is_vision_available
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
if is_soundfile_availble():
|
||||
import soundfile as sf
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def get_new_path(suffix="") -> str:
|
||||
directory = tempfile.mkdtemp()
|
||||
return os.path.join(directory, str(uuid.uuid4()) + suffix)
|
||||
|
||||
|
||||
@require_soundfile
|
||||
@require_torch
|
||||
class AgentAudioTests(unittest.TestCase):
|
||||
def test_from_tensor(self):
|
||||
tensor = torch.rand(12, dtype=torch.float64) - 0.5
|
||||
agent_type = AgentAudio(tensor)
|
||||
path = str(agent_type.to_string())
|
||||
|
||||
# Ensure that the tensor and the agent_type's tensor are the same
|
||||
self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
|
||||
|
||||
del agent_type
|
||||
|
||||
# Ensure the path remains even after the object deletion
|
||||
self.assertTrue(os.path.exists(path))
|
||||
|
||||
# Ensure that the file contains the same value as the original tensor
|
||||
new_tensor, _ = sf.read(path)
|
||||
self.assertTrue(torch.allclose(tensor, torch.tensor(new_tensor), atol=1e-4))
|
||||
|
||||
def test_from_string(self):
|
||||
tensor = torch.rand(12, dtype=torch.float64) - 0.5
|
||||
path = get_new_path(suffix=".wav")
|
||||
sf.write(path, tensor, 16000)
|
||||
|
||||
agent_type = AgentAudio(path)
|
||||
|
||||
self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
|
||||
self.assertEqual(agent_type.to_string(), path)
|
||||
|
||||
|
||||
@require_vision
|
||||
@require_torch
|
||||
class AgentImageTests(unittest.TestCase):
|
||||
def test_from_tensor(self):
|
||||
tensor = torch.randint(0, 256, (64, 64, 3))
|
||||
agent_type = AgentImage(tensor)
|
||||
path = str(agent_type.to_string())
|
||||
|
||||
# Ensure that the tensor and the agent_type's tensor are the same
|
||||
self.assertTrue(torch.allclose(tensor, agent_type._tensor, atol=1e-4))
|
||||
|
||||
self.assertIsInstance(agent_type.to_raw(), Image.Image)
|
||||
|
||||
# Ensure the path remains even after the object deletion
|
||||
del agent_type
|
||||
self.assertTrue(os.path.exists(path))
|
||||
|
||||
def test_from_string(self):
|
||||
path = Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png"
|
||||
image = Image.open(path)
|
||||
agent_type = AgentImage(path)
|
||||
|
||||
self.assertTrue(path.samefile(agent_type.to_string()))
|
||||
self.assertTrue(image == agent_type.to_raw())
|
||||
|
||||
# Ensure the path remains even after the object deletion
|
||||
del agent_type
|
||||
self.assertTrue(os.path.exists(path))
|
||||
|
||||
def test_from_image(self):
|
||||
path = Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png"
|
||||
image = Image.open(path)
|
||||
agent_type = AgentImage(image)
|
||||
|
||||
self.assertFalse(path.samefile(agent_type.to_string()))
|
||||
self.assertTrue(image == agent_type.to_raw())
|
||||
|
||||
# Ensure the path remains even after the object deletion
|
||||
del agent_type
|
||||
self.assertTrue(os.path.exists(path))
|
||||
|
||||
|
||||
class AgentTextTests(unittest.TestCase):
|
||||
def test_from_string(self):
|
||||
string = "Hey!"
|
||||
agent_type = AgentText(string)
|
||||
|
||||
self.assertEqual(string, agent_type.to_string())
|
||||
self.assertEqual(string, agent_type.to_raw())
|
||||
self.assertEqual(string, agent_type)
|
||||
@@ -30,28 +30,27 @@ class DocumentQuestionAnsweringToolTester(unittest.TestCase, ToolTesterMixin):
|
||||
|
||||
def test_exact_match_arg(self):
|
||||
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
|
||||
image = dataset[0]["image"]
|
||||
document = dataset[0]["image"]
|
||||
|
||||
result = self.tool(image, "When is the coffee break?")
|
||||
result = self.tool(document, "When is the coffee break?")
|
||||
self.assertEqual(result, "11-14 to 11:39 a.m.")
|
||||
|
||||
def test_exact_match_arg_remote(self):
|
||||
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
|
||||
image = dataset[0]["image"]
|
||||
document = dataset[0]["image"]
|
||||
|
||||
result = self.remote_tool(image, "When is the coffee break?")
|
||||
result = self.remote_tool(document, "When is the coffee break?")
|
||||
self.assertEqual(result, "11-14 to 11:39 a.m.")
|
||||
|
||||
def test_exact_match_kwarg(self):
|
||||
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
|
||||
image = dataset[0]["image"]
|
||||
document = dataset[0]["image"]
|
||||
|
||||
result = self.tool(image=image, question="When is the coffee break?")
|
||||
self.assertEqual(result, "11-14 to 11:39 a.m.")
|
||||
self.tool(document=document, question="When is the coffee break?")
|
||||
|
||||
def test_exact_match_kwarg_remote(self):
|
||||
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
|
||||
image = dataset[0]["image"]
|
||||
document = dataset[0]["image"]
|
||||
|
||||
result = self.remote_tool(image=image, question="When is the coffee break?")
|
||||
result = self.remote_tool(document=document, question="When is the coffee break?")
|
||||
self.assertEqual(result, "11-14 to 11:39 a.m.")
|
||||
|
||||
@@ -37,9 +37,11 @@ class TextToSpeechToolTester(unittest.TestCase, ToolTesterMixin):
|
||||
# SpeechT5 isn't deterministic
|
||||
torch.manual_seed(0)
|
||||
result = self.tool("hey")
|
||||
resulting_tensor = result.to_raw()
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
result[:3], torch.tensor([-0.00040140701457858086, -0.0002551682700868696, -0.00010294507956132293])
|
||||
resulting_tensor[:3],
|
||||
torch.tensor([-0.0005966668832115829, -0.0003657640190795064, -0.00013439502799883485]),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -47,8 +49,10 @@ class TextToSpeechToolTester(unittest.TestCase, ToolTesterMixin):
|
||||
# SpeechT5 isn't deterministic
|
||||
torch.manual_seed(0)
|
||||
result = self.tool("hey")
|
||||
resulting_tensor = result.to_raw()
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
result[:3], torch.tensor([-0.00040140701457858086, -0.0002551682700868696, -0.00010294507956132293])
|
||||
resulting_tensor[:3],
|
||||
torch.tensor([-0.0005966668832115829, -0.0003657640190795064, -0.00013439502799883485]),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -18,6 +18,7 @@ from typing import List
|
||||
|
||||
from transformers import is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import get_tests_dir, is_tool_test
|
||||
from transformers.tools.agent_types import AGENT_TYPE_MAPPING, AgentAudio, AgentImage, AgentText
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -54,11 +55,11 @@ def output_types(outputs: List):
|
||||
output_types = []
|
||||
|
||||
for output in outputs:
|
||||
if isinstance(output, str):
|
||||
if isinstance(output, (str, AgentText)):
|
||||
output_types.append("text")
|
||||
elif isinstance(output, Image.Image):
|
||||
elif isinstance(output, (Image.Image, AgentImage)):
|
||||
output_types.append("image")
|
||||
elif isinstance(output, torch.Tensor):
|
||||
elif isinstance(output, (torch.Tensor, AgentAudio)):
|
||||
output_types.append("audio")
|
||||
else:
|
||||
raise ValueError(f"Invalid output: {output}")
|
||||
@@ -98,3 +99,35 @@ class ToolTesterMixin:
|
||||
self.assertTrue(hasattr(self.tool, "description"))
|
||||
self.assertTrue(hasattr(self.tool, "default_checkpoint"))
|
||||
self.assertTrue(self.tool.description.startswith("This is a tool that"))
|
||||
|
||||
def test_agent_types_outputs(self):
|
||||
inputs = create_inputs(self.tool.inputs)
|
||||
outputs = self.tool(*inputs)
|
||||
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
|
||||
self.assertEqual(len(outputs), len(self.tool.outputs))
|
||||
|
||||
for output, output_type in zip(outputs, self.tool.outputs):
|
||||
agent_type = AGENT_TYPE_MAPPING[output_type]
|
||||
self.assertTrue(isinstance(output, agent_type))
|
||||
|
||||
def test_agent_types_inputs(self):
|
||||
inputs = create_inputs(self.tool.inputs)
|
||||
|
||||
_inputs = []
|
||||
|
||||
for _input, input_type in zip(inputs, self.tool.inputs):
|
||||
if isinstance(input_type, list):
|
||||
_inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
|
||||
else:
|
||||
_inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
|
||||
|
||||
# Should not raise an error
|
||||
outputs = self.tool(*inputs)
|
||||
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
|
||||
self.assertEqual(len(outputs), len(self.tool.outputs))
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
import unittest
|
||||
|
||||
from transformers import load_tool
|
||||
from transformers.tools.agent_types import AGENT_TYPE_MAPPING
|
||||
|
||||
from .test_tools_common import ToolTesterMixin, output_types
|
||||
|
||||
@@ -51,3 +52,35 @@ class TranslationToolTester(unittest.TestCase, ToolTesterMixin):
|
||||
outputs = [outputs]
|
||||
|
||||
self.assertListEqual(output_types(outputs), self.tool.outputs)
|
||||
|
||||
def test_agent_types_outputs(self):
|
||||
inputs = ["Hey, what's up?", "English", "Spanish"]
|
||||
outputs = self.tool(*inputs)
|
||||
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
|
||||
self.assertEqual(len(outputs), len(self.tool.outputs))
|
||||
|
||||
for output, output_type in zip(outputs, self.tool.outputs):
|
||||
agent_type = AGENT_TYPE_MAPPING[output_type]
|
||||
self.assertTrue(isinstance(output, agent_type))
|
||||
|
||||
def test_agent_types_inputs(self):
|
||||
inputs = ["Hey, what's up?", "English", "Spanish"]
|
||||
|
||||
_inputs = []
|
||||
|
||||
for _input, input_type in zip(inputs, self.tool.inputs):
|
||||
if isinstance(input_type, list):
|
||||
_inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
|
||||
else:
|
||||
_inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
|
||||
|
||||
# Should not raise an error
|
||||
outputs = self.tool(*inputs)
|
||||
|
||||
if not isinstance(outputs, list):
|
||||
outputs = [outputs]
|
||||
|
||||
self.assertEqual(len(outputs), len(self.tool.outputs))
|
||||
|
||||
Reference in New Issue
Block a user