Tool types (#24032)

* Tool types

* Tests + fixes

* Isolate types

* Oops

* Review comments + docs

* Tests + docs

* soundfile -> vision
This commit is contained in:
Lysandre Debut
2023-06-09 13:34:07 -04:00
committed by GitHub
parent 061580c82c
commit deff5979fe
8 changed files with 521 additions and 15 deletions

View File

@@ -0,0 +1,121 @@
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
import uuid
from pathlib import Path
from transformers.testing_utils import get_tests_dir, require_soundfile, require_torch, require_vision
from transformers.tools.agent_types import AgentAudio, AgentImage, AgentText
from transformers.utils import is_soundfile_availble, is_torch_available, is_vision_available
if is_torch_available():
import torch
if is_soundfile_availble():
import soundfile as sf
if is_vision_available():
from PIL import Image
def get_new_path(suffix="") -> str:
directory = tempfile.mkdtemp()
return os.path.join(directory, str(uuid.uuid4()) + suffix)
@require_soundfile
@require_torch
class AgentAudioTests(unittest.TestCase):
def test_from_tensor(self):
tensor = torch.rand(12, dtype=torch.float64) - 0.5
agent_type = AgentAudio(tensor)
path = str(agent_type.to_string())
# Ensure that the tensor and the agent_type's tensor are the same
self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
del agent_type
# Ensure the path remains even after the object deletion
self.assertTrue(os.path.exists(path))
# Ensure that the file contains the same value as the original tensor
new_tensor, _ = sf.read(path)
self.assertTrue(torch.allclose(tensor, torch.tensor(new_tensor), atol=1e-4))
def test_from_string(self):
tensor = torch.rand(12, dtype=torch.float64) - 0.5
path = get_new_path(suffix=".wav")
sf.write(path, tensor, 16000)
agent_type = AgentAudio(path)
self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
self.assertEqual(agent_type.to_string(), path)
@require_vision
@require_torch
class AgentImageTests(unittest.TestCase):
def test_from_tensor(self):
tensor = torch.randint(0, 256, (64, 64, 3))
agent_type = AgentImage(tensor)
path = str(agent_type.to_string())
# Ensure that the tensor and the agent_type's tensor are the same
self.assertTrue(torch.allclose(tensor, agent_type._tensor, atol=1e-4))
self.assertIsInstance(agent_type.to_raw(), Image.Image)
# Ensure the path remains even after the object deletion
del agent_type
self.assertTrue(os.path.exists(path))
def test_from_string(self):
path = Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png"
image = Image.open(path)
agent_type = AgentImage(path)
self.assertTrue(path.samefile(agent_type.to_string()))
self.assertTrue(image == agent_type.to_raw())
# Ensure the path remains even after the object deletion
del agent_type
self.assertTrue(os.path.exists(path))
def test_from_image(self):
path = Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png"
image = Image.open(path)
agent_type = AgentImage(image)
self.assertFalse(path.samefile(agent_type.to_string()))
self.assertTrue(image == agent_type.to_raw())
# Ensure the path remains even after the object deletion
del agent_type
self.assertTrue(os.path.exists(path))
class AgentTextTests(unittest.TestCase):
def test_from_string(self):
string = "Hey!"
agent_type = AgentText(string)
self.assertEqual(string, agent_type.to_string())
self.assertEqual(string, agent_type.to_raw())
self.assertEqual(string, agent_type)

View File

@@ -30,28 +30,27 @@ class DocumentQuestionAnsweringToolTester(unittest.TestCase, ToolTesterMixin):
def test_exact_match_arg(self):
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
image = dataset[0]["image"]
document = dataset[0]["image"]
result = self.tool(image, "When is the coffee break?")
result = self.tool(document, "When is the coffee break?")
self.assertEqual(result, "11-14 to 11:39 a.m.")
def test_exact_match_arg_remote(self):
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
image = dataset[0]["image"]
document = dataset[0]["image"]
result = self.remote_tool(image, "When is the coffee break?")
result = self.remote_tool(document, "When is the coffee break?")
self.assertEqual(result, "11-14 to 11:39 a.m.")
def test_exact_match_kwarg(self):
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
image = dataset[0]["image"]
document = dataset[0]["image"]
result = self.tool(image=image, question="When is the coffee break?")
self.assertEqual(result, "11-14 to 11:39 a.m.")
self.tool(document=document, question="When is the coffee break?")
def test_exact_match_kwarg_remote(self):
dataset = load_dataset("hf-internal-testing/example-documents", split="test")
image = dataset[0]["image"]
document = dataset[0]["image"]
result = self.remote_tool(image=image, question="When is the coffee break?")
result = self.remote_tool(document=document, question="When is the coffee break?")
self.assertEqual(result, "11-14 to 11:39 a.m.")

View File

@@ -37,9 +37,11 @@ class TextToSpeechToolTester(unittest.TestCase, ToolTesterMixin):
# SpeechT5 isn't deterministic
torch.manual_seed(0)
result = self.tool("hey")
resulting_tensor = result.to_raw()
self.assertTrue(
torch.allclose(
result[:3], torch.tensor([-0.00040140701457858086, -0.0002551682700868696, -0.00010294507956132293])
resulting_tensor[:3],
torch.tensor([-0.0005966668832115829, -0.0003657640190795064, -0.00013439502799883485]),
)
)
@@ -47,8 +49,10 @@ class TextToSpeechToolTester(unittest.TestCase, ToolTesterMixin):
# SpeechT5 isn't deterministic
torch.manual_seed(0)
result = self.tool("hey")
resulting_tensor = result.to_raw()
self.assertTrue(
torch.allclose(
result[:3], torch.tensor([-0.00040140701457858086, -0.0002551682700868696, -0.00010294507956132293])
resulting_tensor[:3],
torch.tensor([-0.0005966668832115829, -0.0003657640190795064, -0.00013439502799883485]),
)
)

View File

@@ -18,6 +18,7 @@ from typing import List
from transformers import is_torch_available, is_vision_available
from transformers.testing_utils import get_tests_dir, is_tool_test
from transformers.tools.agent_types import AGENT_TYPE_MAPPING, AgentAudio, AgentImage, AgentText
if is_torch_available():
@@ -54,11 +55,11 @@ def output_types(outputs: List):
output_types = []
for output in outputs:
if isinstance(output, str):
if isinstance(output, (str, AgentText)):
output_types.append("text")
elif isinstance(output, Image.Image):
elif isinstance(output, (Image.Image, AgentImage)):
output_types.append("image")
elif isinstance(output, torch.Tensor):
elif isinstance(output, (torch.Tensor, AgentAudio)):
output_types.append("audio")
else:
raise ValueError(f"Invalid output: {output}")
@@ -98,3 +99,35 @@ class ToolTesterMixin:
self.assertTrue(hasattr(self.tool, "description"))
self.assertTrue(hasattr(self.tool, "default_checkpoint"))
self.assertTrue(self.tool.description.startswith("This is a tool that"))
def test_agent_types_outputs(self):
inputs = create_inputs(self.tool.inputs)
outputs = self.tool(*inputs)
if not isinstance(outputs, list):
outputs = [outputs]
self.assertEqual(len(outputs), len(self.tool.outputs))
for output, output_type in zip(outputs, self.tool.outputs):
agent_type = AGENT_TYPE_MAPPING[output_type]
self.assertTrue(isinstance(output, agent_type))
def test_agent_types_inputs(self):
inputs = create_inputs(self.tool.inputs)
_inputs = []
for _input, input_type in zip(inputs, self.tool.inputs):
if isinstance(input_type, list):
_inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
else:
_inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
# Should not raise an error
outputs = self.tool(*inputs)
if not isinstance(outputs, list):
outputs = [outputs]
self.assertEqual(len(outputs), len(self.tool.outputs))

View File

@@ -16,6 +16,7 @@
import unittest
from transformers import load_tool
from transformers.tools.agent_types import AGENT_TYPE_MAPPING
from .test_tools_common import ToolTesterMixin, output_types
@@ -51,3 +52,35 @@ class TranslationToolTester(unittest.TestCase, ToolTesterMixin):
outputs = [outputs]
self.assertListEqual(output_types(outputs), self.tool.outputs)
def test_agent_types_outputs(self):
inputs = ["Hey, what's up?", "English", "Spanish"]
outputs = self.tool(*inputs)
if not isinstance(outputs, list):
outputs = [outputs]
self.assertEqual(len(outputs), len(self.tool.outputs))
for output, output_type in zip(outputs, self.tool.outputs):
agent_type = AGENT_TYPE_MAPPING[output_type]
self.assertTrue(isinstance(output, agent_type))
def test_agent_types_inputs(self):
inputs = ["Hey, what's up?", "English", "Spanish"]
_inputs = []
for _input, input_type in zip(inputs, self.tool.inputs):
if isinstance(input_type, list):
_inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
else:
_inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
# Should not raise an error
outputs = self.tool(*inputs)
if not isinstance(outputs, list):
outputs = [outputs]
self.assertEqual(len(outputs), len(self.tool.outputs))