Add token cost + runtime monitoring to Agent and HfEngine children (#34548)
* Add monitoring to Agent and HfEngine children
This commit is contained in:
@@ -17,7 +17,8 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
|
import time
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
from .. import is_torch_available
|
from .. import is_torch_available
|
||||||
from ..utils import logging as transformers_logging
|
from ..utils import logging as transformers_logging
|
||||||
@@ -25,6 +26,7 @@ from ..utils.import_utils import is_pygments_available
|
|||||||
from .agent_types import AgentAudio, AgentImage
|
from .agent_types import AgentAudio, AgentImage
|
||||||
from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool, setup_default_tools
|
from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool, setup_default_tools
|
||||||
from .llm_engine import HfApiEngine, MessageRole
|
from .llm_engine import HfApiEngine, MessageRole
|
||||||
|
from .monitoring import Monitor
|
||||||
from .prompts import (
|
from .prompts import (
|
||||||
DEFAULT_CODE_SYSTEM_PROMPT,
|
DEFAULT_CODE_SYSTEM_PROMPT,
|
||||||
DEFAULT_REACT_CODE_SYSTEM_PROMPT,
|
DEFAULT_REACT_CODE_SYSTEM_PROMPT,
|
||||||
@@ -353,17 +355,23 @@ class Agent:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tools: Union[List[Tool], Toolbox],
|
tools: Union[List[Tool], Toolbox],
|
||||||
llm_engine: Callable = HfApiEngine(),
|
llm_engine: Callable = None,
|
||||||
system_prompt=DEFAULT_REACT_CODE_SYSTEM_PROMPT,
|
system_prompt: Optional[str] = None,
|
||||||
tool_description_template=None,
|
tool_description_template: Optional[str] = None,
|
||||||
additional_args={},
|
additional_args: Dict = {},
|
||||||
max_iterations: int = 6,
|
max_iterations: int = 6,
|
||||||
tool_parser=parse_json_tool_call,
|
tool_parser: Optional[Callable] = None,
|
||||||
add_base_tools: bool = False,
|
add_base_tools: bool = False,
|
||||||
verbose: int = 0,
|
verbose: int = 0,
|
||||||
grammar: Dict[str, str] = None,
|
grammar: Optional[Dict[str, str]] = None,
|
||||||
managed_agents: List = None,
|
managed_agents: Optional[List] = None,
|
||||||
|
step_callbacks: Optional[List[Callable]] = None,
|
||||||
|
monitor_metrics: bool = True,
|
||||||
):
|
):
|
||||||
|
if system_prompt is None:
|
||||||
|
system_prompt = DEFAULT_REACT_CODE_SYSTEM_PROMPT
|
||||||
|
if tool_parser is None:
|
||||||
|
tool_parser = parse_json_tool_call
|
||||||
self.agent_name = self.__class__.__name__
|
self.agent_name = self.__class__.__name__
|
||||||
self.llm_engine = llm_engine
|
self.llm_engine = llm_engine
|
||||||
self.system_prompt_template = system_prompt
|
self.system_prompt_template = system_prompt
|
||||||
@@ -406,6 +414,15 @@ class Agent:
|
|||||||
elif verbose == 2:
|
elif verbose == 2:
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# Initialize step callbacks
|
||||||
|
self.step_callbacks = step_callbacks if step_callbacks is not None else []
|
||||||
|
|
||||||
|
# Initialize Monitor if monitor_metrics is True
|
||||||
|
self.monitor = None
|
||||||
|
if monitor_metrics:
|
||||||
|
self.monitor = Monitor(self.llm_engine)
|
||||||
|
self.step_callbacks.append(self.monitor.update_metrics)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def toolbox(self) -> Toolbox:
|
def toolbox(self) -> Toolbox:
|
||||||
"""Get the toolbox currently available to the agent"""
|
"""Get the toolbox currently available to the agent"""
|
||||||
@@ -578,13 +595,19 @@ class CodeAgent(Agent):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tools: List[Tool],
|
tools: List[Tool],
|
||||||
llm_engine: Callable = HfApiEngine(),
|
llm_engine: Optional[Callable] = None,
|
||||||
system_prompt: str = DEFAULT_CODE_SYSTEM_PROMPT,
|
system_prompt: Optional[str] = None,
|
||||||
tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
|
tool_description_template: Optional[str] = None,
|
||||||
grammar: Dict[str, str] = None,
|
grammar: Optional[Dict[str, str]] = None,
|
||||||
additional_authorized_imports: Optional[List[str]] = None,
|
additional_authorized_imports: Optional[List[str]] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
if llm_engine is None:
|
||||||
|
llm_engine = HfApiEngine()
|
||||||
|
if system_prompt is None:
|
||||||
|
system_prompt = DEFAULT_CODE_SYSTEM_PROMPT
|
||||||
|
if tool_description_template is None:
|
||||||
|
tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
|
||||||
super().__init__(
|
super().__init__(
|
||||||
tools=tools,
|
tools=tools,
|
||||||
llm_engine=llm_engine,
|
llm_engine=llm_engine,
|
||||||
@@ -700,14 +723,23 @@ class ReactAgent(Agent):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tools: List[Tool],
|
tools: List[Tool],
|
||||||
llm_engine: Callable = HfApiEngine(),
|
llm_engine: Optional[Callable] = None,
|
||||||
system_prompt: str = DEFAULT_REACT_CODE_SYSTEM_PROMPT,
|
system_prompt: Optional[str] = None,
|
||||||
tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
|
tool_description_template: Optional[str] = None,
|
||||||
grammar: Dict[str, str] = None,
|
grammar: Optional[Dict[str, str]] = None,
|
||||||
plan_type: Literal[tuple(SUPPORTED_PLAN_TYPES)] = SUPPORTED_PLAN_TYPES[0],
|
plan_type: Optional[str] = None,
|
||||||
planning_interval: Optional[int] = None,
|
planning_interval: Optional[int] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
if llm_engine is None:
|
||||||
|
llm_engine = HfApiEngine()
|
||||||
|
if system_prompt is None:
|
||||||
|
system_prompt = DEFAULT_REACT_CODE_SYSTEM_PROMPT
|
||||||
|
if tool_description_template is None:
|
||||||
|
tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
|
||||||
|
if plan_type is None:
|
||||||
|
plan_type = SUPPORTED_PLAN_TYPES[0]
|
||||||
|
else:
|
||||||
assert plan_type in SUPPORTED_PLAN_TYPES, f"plan type {plan_type} is not supported"
|
assert plan_type in SUPPORTED_PLAN_TYPES, f"plan type {plan_type} is not supported"
|
||||||
super().__init__(
|
super().__init__(
|
||||||
tools=tools,
|
tools=tools,
|
||||||
@@ -776,16 +808,24 @@ class ReactAgent(Agent):
|
|||||||
final_answer = None
|
final_answer = None
|
||||||
iteration = 0
|
iteration = 0
|
||||||
while final_answer is None and iteration < self.max_iterations:
|
while final_answer is None and iteration < self.max_iterations:
|
||||||
|
step_start_time = time.time()
|
||||||
|
step_log_entry = {"iteration": iteration, "start_time": step_start_time}
|
||||||
try:
|
try:
|
||||||
step_logs = self.step()
|
self.step(step_log_entry)
|
||||||
if "final_answer" in step_logs:
|
if "final_answer" in step_log_entry:
|
||||||
final_answer = step_logs["final_answer"]
|
final_answer = step_log_entry["final_answer"]
|
||||||
except AgentError as e:
|
except AgentError as e:
|
||||||
self.logger.error(e, exc_info=1)
|
self.logger.error(e, exc_info=1)
|
||||||
self.logs[-1]["error"] = e
|
step_log_entry["error"] = e
|
||||||
finally:
|
finally:
|
||||||
|
step_end_time = time.time()
|
||||||
|
step_log_entry["step_end_time"] = step_end_time
|
||||||
|
step_log_entry["step_duration"] = step_end_time - step_start_time
|
||||||
|
self.logs.append(step_log_entry)
|
||||||
|
for callback in self.step_callbacks:
|
||||||
|
callback(step_log_entry)
|
||||||
iteration += 1
|
iteration += 1
|
||||||
yield self.logs[-1]
|
yield step_log_entry
|
||||||
|
|
||||||
if final_answer is None and iteration == self.max_iterations:
|
if final_answer is None and iteration == self.max_iterations:
|
||||||
error_message = "Reached max iterations."
|
error_message = "Reached max iterations."
|
||||||
@@ -794,6 +834,9 @@ class ReactAgent(Agent):
|
|||||||
self.logger.error(error_message, exc_info=1)
|
self.logger.error(error_message, exc_info=1)
|
||||||
final_answer = self.provide_final_answer(task)
|
final_answer = self.provide_final_answer(task)
|
||||||
final_step_log["final_answer"] = final_answer
|
final_step_log["final_answer"] = final_answer
|
||||||
|
final_step_log["step_duration"] = 0
|
||||||
|
for callback in self.step_callbacks:
|
||||||
|
callback(final_step_log)
|
||||||
yield final_step_log
|
yield final_step_log
|
||||||
|
|
||||||
yield final_answer
|
yield final_answer
|
||||||
@@ -805,16 +848,24 @@ class ReactAgent(Agent):
|
|||||||
final_answer = None
|
final_answer = None
|
||||||
iteration = 0
|
iteration = 0
|
||||||
while final_answer is None and iteration < self.max_iterations:
|
while final_answer is None and iteration < self.max_iterations:
|
||||||
|
step_start_time = time.time()
|
||||||
|
step_log_entry = {"iteration": iteration, "start_time": step_start_time}
|
||||||
try:
|
try:
|
||||||
if self.planning_interval is not None and iteration % self.planning_interval == 0:
|
if self.planning_interval is not None and iteration % self.planning_interval == 0:
|
||||||
self.planning_step(task, is_first_step=(iteration == 0), iteration=iteration)
|
self.planning_step(task, is_first_step=(iteration == 0), iteration=iteration)
|
||||||
step_logs = self.step()
|
self.step(step_log_entry)
|
||||||
if "final_answer" in step_logs:
|
if "final_answer" in step_log_entry:
|
||||||
final_answer = step_logs["final_answer"]
|
final_answer = step_log_entry["final_answer"]
|
||||||
except AgentError as e:
|
except AgentError as e:
|
||||||
self.logger.error(e, exc_info=1)
|
self.logger.error(e, exc_info=1)
|
||||||
self.logs[-1]["error"] = e
|
step_log_entry["error"] = e
|
||||||
finally:
|
finally:
|
||||||
|
step_end_time = time.time()
|
||||||
|
step_log_entry["step_end_time"] = step_end_time
|
||||||
|
step_log_entry["step_duration"] = step_end_time - step_start_time
|
||||||
|
self.logs.append(step_log_entry)
|
||||||
|
for callback in self.step_callbacks:
|
||||||
|
callback(step_log_entry)
|
||||||
iteration += 1
|
iteration += 1
|
||||||
|
|
||||||
if final_answer is None and iteration == self.max_iterations:
|
if final_answer is None and iteration == self.max_iterations:
|
||||||
@@ -824,6 +875,9 @@ class ReactAgent(Agent):
|
|||||||
self.logger.error(error_message, exc_info=1)
|
self.logger.error(error_message, exc_info=1)
|
||||||
final_answer = self.provide_final_answer(task)
|
final_answer = self.provide_final_answer(task)
|
||||||
final_step_log["final_answer"] = final_answer
|
final_step_log["final_answer"] = final_answer
|
||||||
|
final_step_log["step_duration"] = 0
|
||||||
|
for callback in self.step_callbacks:
|
||||||
|
callback(final_step_log)
|
||||||
|
|
||||||
return final_answer
|
return final_answer
|
||||||
|
|
||||||
@@ -937,13 +991,19 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tools: List[Tool],
|
tools: List[Tool],
|
||||||
llm_engine: Callable = HfApiEngine(),
|
llm_engine: Optional[Callable] = None,
|
||||||
system_prompt: str = DEFAULT_REACT_JSON_SYSTEM_PROMPT,
|
system_prompt: Optional[str] = None,
|
||||||
tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
|
tool_description_template: Optional[str] = None,
|
||||||
grammar: Dict[str, str] = None,
|
grammar: Optional[Dict[str, str]] = None,
|
||||||
planning_interval: Optional[int] = None,
|
planning_interval: Optional[int] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
if llm_engine is None:
|
||||||
|
llm_engine = HfApiEngine()
|
||||||
|
if system_prompt is None:
|
||||||
|
system_prompt = DEFAULT_REACT_JSON_SYSTEM_PROMPT
|
||||||
|
if tool_description_template is None:
|
||||||
|
tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
|
||||||
super().__init__(
|
super().__init__(
|
||||||
tools=tools,
|
tools=tools,
|
||||||
llm_engine=llm_engine,
|
llm_engine=llm_engine,
|
||||||
@@ -954,7 +1014,7 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self):
|
def step(self, log_entry: Dict[str, Any]):
|
||||||
"""
|
"""
|
||||||
Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
|
Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
|
||||||
The errors are raised here, they are caught and logged in the run() method.
|
The errors are raised here, they are caught and logged in the run() method.
|
||||||
@@ -965,9 +1025,7 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
self.logger.debug("===== New step =====")
|
self.logger.debug("===== New step =====")
|
||||||
|
|
||||||
# Add new step in logs
|
# Add new step in logs
|
||||||
current_step_logs = {}
|
log_entry["agent_memory"] = agent_memory.copy()
|
||||||
self.logs.append(current_step_logs)
|
|
||||||
current_step_logs["agent_memory"] = agent_memory.copy()
|
|
||||||
|
|
||||||
self.logger.info("===== Calling LLM with this last message: =====")
|
self.logger.info("===== Calling LLM with this last message: =====")
|
||||||
self.logger.info(self.prompt[-1])
|
self.logger.info(self.prompt[-1])
|
||||||
@@ -981,7 +1039,7 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
raise AgentGenerationError(f"Error in generating llm output: {e}.")
|
raise AgentGenerationError(f"Error in generating llm output: {e}.")
|
||||||
self.logger.debug("===== Output message of the LLM: =====")
|
self.logger.debug("===== Output message of the LLM: =====")
|
||||||
self.logger.debug(llm_output)
|
self.logger.debug(llm_output)
|
||||||
current_step_logs["llm_output"] = llm_output
|
log_entry["llm_output"] = llm_output
|
||||||
|
|
||||||
# Parse
|
# Parse
|
||||||
self.logger.debug("===== Extracting action =====")
|
self.logger.debug("===== Extracting action =====")
|
||||||
@@ -992,8 +1050,8 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise AgentParsingError(f"Could not parse the given action: {e}.")
|
raise AgentParsingError(f"Could not parse the given action: {e}.")
|
||||||
|
|
||||||
current_step_logs["rationale"] = rationale
|
log_entry["rationale"] = rationale
|
||||||
current_step_logs["tool_call"] = {"tool_name": tool_name, "tool_arguments": arguments}
|
log_entry["tool_call"] = {"tool_name": tool_name, "tool_arguments": arguments}
|
||||||
|
|
||||||
# Execute
|
# Execute
|
||||||
self.logger.warning("=== Agent thoughts:")
|
self.logger.warning("=== Agent thoughts:")
|
||||||
@@ -1011,8 +1069,8 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
answer = arguments
|
answer = arguments
|
||||||
else:
|
else:
|
||||||
answer = arguments
|
answer = arguments
|
||||||
current_step_logs["final_answer"] = answer
|
log_entry["final_answer"] = answer
|
||||||
return current_step_logs
|
return answer
|
||||||
else:
|
else:
|
||||||
if arguments is None:
|
if arguments is None:
|
||||||
arguments = {}
|
arguments = {}
|
||||||
@@ -1030,8 +1088,8 @@ class ReactJsonAgent(ReactAgent):
|
|||||||
else:
|
else:
|
||||||
updated_information = str(observation).strip()
|
updated_information = str(observation).strip()
|
||||||
self.logger.info(updated_information)
|
self.logger.info(updated_information)
|
||||||
current_step_logs["observation"] = updated_information
|
log_entry["observation"] = updated_information
|
||||||
return current_step_logs
|
return log_entry
|
||||||
|
|
||||||
|
|
||||||
class ReactCodeAgent(ReactAgent):
|
class ReactCodeAgent(ReactAgent):
|
||||||
@@ -1044,14 +1102,20 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tools: List[Tool],
|
tools: List[Tool],
|
||||||
llm_engine: Callable = HfApiEngine(),
|
llm_engine: Optional[Callable] = None,
|
||||||
system_prompt: str = DEFAULT_REACT_CODE_SYSTEM_PROMPT,
|
system_prompt: Optional[str] = None,
|
||||||
tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
|
tool_description_template: Optional[str] = None,
|
||||||
grammar: Dict[str, str] = None,
|
grammar: Optional[Dict[str, str]] = None,
|
||||||
additional_authorized_imports: Optional[List[str]] = None,
|
additional_authorized_imports: Optional[List[str]] = None,
|
||||||
planning_interval: Optional[int] = None,
|
planning_interval: Optional[int] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
if llm_engine is None:
|
||||||
|
llm_engine = HfApiEngine()
|
||||||
|
if system_prompt is None:
|
||||||
|
system_prompt = DEFAULT_REACT_CODE_SYSTEM_PROMPT
|
||||||
|
if tool_description_template is None:
|
||||||
|
tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
|
||||||
super().__init__(
|
super().__init__(
|
||||||
tools=tools,
|
tools=tools,
|
||||||
llm_engine=llm_engine,
|
llm_engine=llm_engine,
|
||||||
@@ -1075,7 +1139,7 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
self.system_prompt = self.system_prompt.replace("<<authorized_imports>>", str(self.authorized_imports))
|
self.system_prompt = self.system_prompt.replace("<<authorized_imports>>", str(self.authorized_imports))
|
||||||
self.custom_tools = {}
|
self.custom_tools = {}
|
||||||
|
|
||||||
def step(self):
|
def step(self, log_entry: Dict[str, Any]):
|
||||||
"""
|
"""
|
||||||
Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
|
Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
|
||||||
The errors are raised here, they are caught and logged in the run() method.
|
The errors are raised here, they are caught and logged in the run() method.
|
||||||
@@ -1083,13 +1147,10 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
agent_memory = self.write_inner_memory_from_logs()
|
agent_memory = self.write_inner_memory_from_logs()
|
||||||
|
|
||||||
self.prompt = agent_memory.copy()
|
self.prompt = agent_memory.copy()
|
||||||
|
|
||||||
self.logger.debug("===== New step =====")
|
self.logger.debug("===== New step =====")
|
||||||
|
|
||||||
# Add new step in logs
|
# Add new step in logs
|
||||||
current_step_logs = {}
|
log_entry["agent_memory"] = agent_memory.copy()
|
||||||
self.logs.append(current_step_logs)
|
|
||||||
current_step_logs["agent_memory"] = agent_memory.copy()
|
|
||||||
|
|
||||||
self.logger.info("===== Calling LLM with these last messages: =====")
|
self.logger.info("===== Calling LLM with these last messages: =====")
|
||||||
self.logger.info(self.prompt[-2:])
|
self.logger.info(self.prompt[-2:])
|
||||||
@@ -1104,7 +1165,7 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
|
|
||||||
self.logger.debug("=== Output message of the LLM:")
|
self.logger.debug("=== Output message of the LLM:")
|
||||||
self.logger.debug(llm_output)
|
self.logger.debug(llm_output)
|
||||||
current_step_logs["llm_output"] = llm_output
|
log_entry["llm_output"] = llm_output
|
||||||
|
|
||||||
# Parse
|
# Parse
|
||||||
self.logger.debug("=== Extracting action ===")
|
self.logger.debug("=== Extracting action ===")
|
||||||
@@ -1120,8 +1181,8 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
error_msg = f"Error in code parsing: {e}. Make sure to provide correct code"
|
error_msg = f"Error in code parsing: {e}. Make sure to provide correct code"
|
||||||
raise AgentParsingError(error_msg)
|
raise AgentParsingError(error_msg)
|
||||||
|
|
||||||
current_step_logs["rationale"] = rationale
|
log_entry["rationale"] = rationale
|
||||||
current_step_logs["tool_call"] = {"tool_name": "code interpreter", "tool_arguments": code_action}
|
log_entry["tool_call"] = {"tool_name": "code interpreter", "tool_arguments": code_action}
|
||||||
|
|
||||||
# Execute
|
# Execute
|
||||||
self.log_rationale_code_action(rationale, code_action)
|
self.log_rationale_code_action(rationale, code_action)
|
||||||
@@ -1146,7 +1207,7 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
self.logger.warning("Last output from code snippet:")
|
self.logger.warning("Last output from code snippet:")
|
||||||
self.logger.log(32, str(result))
|
self.logger.log(32, str(result))
|
||||||
observation += "Last output from code snippet:\n" + str(result)[:100000]
|
observation += "Last output from code snippet:\n" + str(result)[:100000]
|
||||||
current_step_logs["observation"] = observation
|
log_entry["observation"] = observation
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Code execution failed due to the following error:\n{str(e)}"
|
error_msg = f"Code execution failed due to the following error:\n{str(e)}"
|
||||||
if "'dict' object has no attribute 'read'" in str(e):
|
if "'dict' object has no attribute 'read'" in str(e):
|
||||||
@@ -1156,8 +1217,11 @@ class ReactCodeAgent(ReactAgent):
|
|||||||
if line[: len("final_answer")] == "final_answer":
|
if line[: len("final_answer")] == "final_answer":
|
||||||
self.logger.log(33, "Final answer:")
|
self.logger.log(33, "Final answer:")
|
||||||
self.logger.log(32, result)
|
self.logger.log(32, result)
|
||||||
current_step_logs["final_answer"] = result
|
log_entry["final_answer"] = result
|
||||||
return current_step_logs
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
LENGTH_TRUNCATE_REPORTS = 1000
|
||||||
|
|
||||||
|
|
||||||
class ManagedAgent:
|
class ManagedAgent:
|
||||||
@@ -1200,10 +1264,14 @@ And even if your task resolution is not successful, please return as much contex
|
|||||||
answer += f"\n\nFor more detail, find below a summary of this agent's work:\nSUMMARY OF WORK FROM AGENT '{self.name}':\n"
|
answer += f"\n\nFor more detail, find below a summary of this agent's work:\nSUMMARY OF WORK FROM AGENT '{self.name}':\n"
|
||||||
for message in self.agent.write_inner_memory_from_logs(summary_mode=True):
|
for message in self.agent.write_inner_memory_from_logs(summary_mode=True):
|
||||||
content = message["content"]
|
content = message["content"]
|
||||||
if len(str(content)) < 1000 or "[FACTS LIST]" in str(content):
|
if len(str(content)) < LENGTH_TRUNCATE_REPORTS or "[FACTS LIST]" in str(content):
|
||||||
answer += "\n" + str(content) + "\n---"
|
answer += "\n" + str(content) + "\n---"
|
||||||
else:
|
else:
|
||||||
answer += "\n" + str(content)[:1000] + "\n(...Step was truncated because too long)...\n---"
|
answer += (
|
||||||
|
"\n"
|
||||||
|
+ str(content)[:LENGTH_TRUNCATE_REPORTS]
|
||||||
|
+ "\n(...Step was truncated because too long)...\n---"
|
||||||
|
)
|
||||||
answer += f"\nEND OF SUMMARY OF WORK FROM AGENT '{self.name}'."
|
answer += f"\nEND OF SUMMARY OF WORK FROM AGENT '{self.name}'."
|
||||||
return answer
|
return answer
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -20,7 +20,12 @@ from typing import Dict, List, Optional
|
|||||||
|
|
||||||
from huggingface_hub import InferenceClient
|
from huggingface_hub import InferenceClient
|
||||||
|
|
||||||
|
from .. import AutoTokenizer
|
||||||
from ..pipelines.base import Pipeline
|
from ..pipelines.base import Pipeline
|
||||||
|
from ..utils import logging
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class MessageRole(str, Enum):
|
class MessageRole(str, Enum):
|
||||||
@@ -67,46 +72,32 @@ llama_role_conversions = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class HfApiEngine:
|
class HfEngine:
|
||||||
"""A class to interact with Hugging Face's Inference API for language model interaction.
|
def __init__(self, model_id: Optional[str] = None):
|
||||||
|
self.last_input_token_count = None
|
||||||
|
self.last_output_token_count = None
|
||||||
|
if model_id is None:
|
||||||
|
model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
|
||||||
|
logger.warning(f"Using default model for token counting: '{model_id}'")
|
||||||
|
try:
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load tokenizer for model {model_id}: {e}. Loading default tokenizer instead.")
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
|
||||||
|
|
||||||
This engine allows you to communicate with Hugging Face's models using the Inference API. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization.
|
def get_token_counts(self):
|
||||||
|
return {
|
||||||
|
"input_token_count": self.last_input_token_count,
|
||||||
|
"output_token_count": self.last_output_token_count,
|
||||||
|
}
|
||||||
|
|
||||||
Parameters:
|
def generate(
|
||||||
model (`str`, *optional*, defaults to `"meta-llama/Meta-Llama-3.1-8B-Instruct"`):
|
self, messages: List[Dict[str, str]], stop_sequences: Optional[List[str]] = None, grammar: Optional[str] = None
|
||||||
The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
|
|
||||||
token (`str`, *optional*):
|
|
||||||
The Hugging Face API token for authentication. If not provided, the class will use the token stored in the Hugging Face CLI configuration.
|
|
||||||
max_tokens (`int`, *optional*, defaults to 1500):
|
|
||||||
The maximum number of tokens allowed in the output.
|
|
||||||
timeout (`int`, *optional*, defaults to 120):
|
|
||||||
Timeout for the API request, in seconds.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError:
|
|
||||||
If the model name is not provided.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
||||||
token: Optional[str] = None,
|
|
||||||
max_tokens: Optional[int] = 1500,
|
|
||||||
timeout: Optional[int] = 120,
|
|
||||||
):
|
):
|
||||||
"""Initialize the HfApiEngine."""
|
raise NotImplementedError
|
||||||
if not model:
|
|
||||||
raise ValueError("Model name must be provided.")
|
|
||||||
|
|
||||||
self.model = model
|
|
||||||
self.client = InferenceClient(self.model, token=token, timeout=timeout)
|
|
||||||
self.max_tokens = max_tokens
|
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
self,
|
self, messages: List[Dict[str, str]], stop_sequences: Optional[List[str]] = None, grammar: Optional[str] = None
|
||||||
messages: List[Dict[str, str]],
|
|
||||||
stop_sequences: Optional[List[str]] = [],
|
|
||||||
grammar: Optional[str] = None,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Process the input messages and return the model's response.
|
"""Process the input messages and return the model's response.
|
||||||
|
|
||||||
@@ -136,6 +127,57 @@ class HfApiEngine:
|
|||||||
"Quantum mechanics is the branch of physics that studies..."
|
"Quantum mechanics is the branch of physics that studies..."
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
if not isinstance(messages, List):
|
||||||
|
raise ValueError("Messages should be a list of dictionaries with 'role' and 'content' keys.")
|
||||||
|
if stop_sequences is None:
|
||||||
|
stop_sequences = []
|
||||||
|
response = self.generate(messages, stop_sequences, grammar)
|
||||||
|
self.last_input_token_count = len(self.tokenizer.apply_chat_template(messages, tokenize=True))
|
||||||
|
self.last_output_token_count = len(self.tokenizer.encode(response))
|
||||||
|
|
||||||
|
# Remove stop sequences from LLM output
|
||||||
|
for stop_seq in stop_sequences:
|
||||||
|
if response[-len(stop_seq) :] == stop_seq:
|
||||||
|
response = response[: -len(stop_seq)]
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class HfApiEngine(HfEngine):
|
||||||
|
"""A class to interact with Hugging Face's Inference API for language model interaction.
|
||||||
|
|
||||||
|
This engine allows you to communicate with Hugging Face's models using the Inference API. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
model (`str`, *optional*, defaults to `"meta-llama/Meta-Llama-3.1-8B-Instruct"`):
|
||||||
|
The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
|
||||||
|
token (`str`, *optional*):
|
||||||
|
Token used by the Hugging Face API for authentication.
|
||||||
|
If not provided, the class will use the token stored in the Hugging Face CLI configuration.
|
||||||
|
max_tokens (`int`, *optional*, defaults to 1500):
|
||||||
|
The maximum number of tokens allowed in the output.
|
||||||
|
timeout (`int`, *optional*, defaults to 120):
|
||||||
|
Timeout for the API request, in seconds.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError:
|
||||||
|
If the model name is not provided.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
token: Optional[str] = None,
|
||||||
|
max_tokens: Optional[int] = 1500,
|
||||||
|
timeout: Optional[int] = 120,
|
||||||
|
):
|
||||||
|
super().__init__(model_id=model)
|
||||||
|
self.model = model
|
||||||
|
self.client = InferenceClient(self.model, token=token, timeout=timeout)
|
||||||
|
self.max_tokens = max_tokens
|
||||||
|
|
||||||
|
def generate(
|
||||||
|
self, messages: List[Dict[str, str]], stop_sequences: Optional[List[str]] = None, grammar: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
# Get clean message list
|
# Get clean message list
|
||||||
messages = get_clean_message_list(messages, role_conversions=llama_role_conversions)
|
messages = get_clean_message_list(messages, role_conversions=llama_role_conversions)
|
||||||
|
|
||||||
@@ -148,41 +190,40 @@ class HfApiEngine:
|
|||||||
response = self.client.chat_completion(messages, stop=stop_sequences, max_tokens=self.max_tokens)
|
response = self.client.chat_completion(messages, stop=stop_sequences, max_tokens=self.max_tokens)
|
||||||
|
|
||||||
response = response.choices[0].message.content
|
response = response.choices[0].message.content
|
||||||
|
|
||||||
# Remove stop sequences from LLM output
|
|
||||||
for stop_seq in stop_sequences:
|
|
||||||
if response[-len(stop_seq) :] == stop_seq:
|
|
||||||
response = response[: -len(stop_seq)]
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
class TransformersEngine:
|
class TransformersEngine(HfEngine):
|
||||||
"""This engine uses a pre-initialized local text-generation pipeline."""
|
"""This engine uses a pre-initialized local text-generation pipeline."""
|
||||||
|
|
||||||
def __init__(self, pipeline: Pipeline):
|
def __init__(self, pipeline: Pipeline, model_id: Optional[str] = None):
|
||||||
|
super().__init__(model_id)
|
||||||
self.pipeline = pipeline
|
self.pipeline = pipeline
|
||||||
|
|
||||||
def __call__(
|
def generate(
|
||||||
self, messages: List[Dict[str, str]], stop_sequences: Optional[List[str]] = None, grammar: Optional[str] = None
|
self,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
stop_sequences: Optional[List[str]] = None,
|
||||||
|
grammar: Optional[str] = None,
|
||||||
|
max_length: int = 1500,
|
||||||
) -> str:
|
) -> str:
|
||||||
# Get clean message list
|
# Get clean message list
|
||||||
messages = get_clean_message_list(messages, role_conversions=llama_role_conversions)
|
messages = get_clean_message_list(messages, role_conversions=llama_role_conversions)
|
||||||
|
|
||||||
# Get LLM output
|
# Get LLM output
|
||||||
|
if stop_sequences is not None and len(stop_sequences) > 0:
|
||||||
|
stop_strings = stop_sequences
|
||||||
|
else:
|
||||||
|
stop_strings = None
|
||||||
|
|
||||||
output = self.pipeline(
|
output = self.pipeline(
|
||||||
messages,
|
messages,
|
||||||
stop_strings=stop_sequences,
|
stop_strings=stop_strings,
|
||||||
max_length=1500,
|
max_length=max_length,
|
||||||
tokenizer=self.pipeline.tokenizer,
|
tokenizer=self.pipeline.tokenizer,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = output[0]["generated_text"][-1]["content"]
|
response = output[0]["generated_text"][-1]["content"]
|
||||||
|
|
||||||
# Remove stop sequences from LLM output
|
|
||||||
if stop_sequences is not None:
|
|
||||||
for stop_seq in stop_sequences:
|
|
||||||
if response[-len(stop_seq) :] == stop_seq:
|
|
||||||
response = response[: -len(stop_seq)]
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,11 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
from ..utils import logging
|
||||||
from .agent_types import AgentAudio, AgentImage, AgentText
|
from .agent_types import AgentAudio, AgentImage, AgentText
|
||||||
from .agents import ReactAgent
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def pull_message(step_log: dict, test_mode: bool = True):
|
def pull_message(step_log: dict, test_mode: bool = True):
|
||||||
@@ -54,7 +57,7 @@ def pull_message(step_log: dict, test_mode: bool = True):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def stream_to_gradio(agent: ReactAgent, task: str, test_mode: bool = False, **kwargs):
|
def stream_to_gradio(agent, task: str, test_mode: bool = False, **kwargs):
|
||||||
"""Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
|
"""Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -91,3 +94,24 @@ def stream_to_gradio(agent: ReactAgent, task: str, test_mode: bool = False, **kw
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
yield ChatMessage(role="assistant", content=str(final_answer))
|
yield ChatMessage(role="assistant", content=str(final_answer))
|
||||||
|
|
||||||
|
|
||||||
|
class Monitor:
|
||||||
|
def __init__(self, tracked_llm_engine):
|
||||||
|
self.step_durations = []
|
||||||
|
self.tracked_llm_engine = tracked_llm_engine
|
||||||
|
if getattr(self.tracked_llm_engine, "last_input_token_count", "Not found") != "Not found":
|
||||||
|
self.total_input_token_count = 0
|
||||||
|
self.total_output_token_count = 0
|
||||||
|
|
||||||
|
def update_metrics(self, step_log):
|
||||||
|
step_duration = step_log["step_duration"]
|
||||||
|
self.step_durations.append(step_duration)
|
||||||
|
logger.info(f"Step {len(self.step_durations)}:")
|
||||||
|
logger.info(f"- Time taken: {step_duration:.2f} seconds (valid only if step succeeded)")
|
||||||
|
|
||||||
|
if getattr(self.tracked_llm_engine, "last_input_token_count", None) is not None:
|
||||||
|
self.total_input_token_count += self.tracked_llm_engine.last_input_token_count
|
||||||
|
self.total_output_token_count += self.tracked_llm_engine.last_output_token_count
|
||||||
|
logger.info(f"- Input tokens: {self.total_input_token_count}")
|
||||||
|
logger.info(f"- Output tokens: {self.total_output_token_count}")
|
||||||
|
|||||||
@@ -785,21 +785,22 @@ def launch_gradio_demo(tool_class: Tool):
|
|||||||
def fn(*args, **kwargs):
|
def fn(*args, **kwargs):
|
||||||
return tool(*args, **kwargs)
|
return tool(*args, **kwargs)
|
||||||
|
|
||||||
|
TYPE_TO_COMPONENT_CLASS_MAPPING = {
|
||||||
|
"image": gr.Image,
|
||||||
|
"audio": gr.Audio,
|
||||||
|
"string": gr.Textbox,
|
||||||
|
"integer": gr.Textbox,
|
||||||
|
"number": gr.Textbox,
|
||||||
|
}
|
||||||
|
|
||||||
gradio_inputs = []
|
gradio_inputs = []
|
||||||
for input_name, input_details in tool_class.inputs.items():
|
for input_name, input_details in tool_class.inputs.items():
|
||||||
input_type = input_details["type"]
|
input_gradio_component_class = TYPE_TO_COMPONENT_CLASS_MAPPING[input_details["type"]]
|
||||||
if input_type == "image":
|
new_component = input_gradio_component_class(label=input_name)
|
||||||
gradio_inputs.append(gr.Image(label=input_name))
|
gradio_inputs.append(new_component)
|
||||||
elif input_type == "audio":
|
|
||||||
gradio_inputs.append(gr.Audio(label=input_name))
|
|
||||||
elif input_type in ["string", "integer", "number"]:
|
|
||||||
gradio_inputs.append(gr.Textbox(label=input_name))
|
|
||||||
else:
|
|
||||||
error_message = f"Input type '{input_type}' not supported."
|
|
||||||
raise ValueError(error_message)
|
|
||||||
|
|
||||||
gradio_output = tool_class.output_type
|
output_gradio_componentclass = TYPE_TO_COMPONENT_CLASS_MAPPING[tool_class.output_type]
|
||||||
assert gradio_output in ["string", "image", "audio"], f"Output type '{gradio_output}' not supported."
|
gradio_output = output_gradio_componentclass(label=input_name)
|
||||||
|
|
||||||
gr.Interface(
|
gr.Interface(
|
||||||
fn=fn,
|
fn=fn,
|
||||||
|
|||||||
@@ -21,11 +21,95 @@ from transformers.agents.monitoring import stream_to_gradio
|
|||||||
|
|
||||||
|
|
||||||
class MonitoringTester(unittest.TestCase):
|
class MonitoringTester(unittest.TestCase):
|
||||||
|
def test_code_agent_metrics(self):
|
||||||
|
class FakeLLMEngine:
|
||||||
|
def __init__(self):
|
||||||
|
self.last_input_token_count = 10
|
||||||
|
self.last_output_token_count = 20
|
||||||
|
|
||||||
|
def __call__(self, prompt, **kwargs):
|
||||||
|
return """
|
||||||
|
Code:
|
||||||
|
```py
|
||||||
|
final_answer('This is the final answer.')
|
||||||
|
```"""
|
||||||
|
|
||||||
|
agent = ReactCodeAgent(
|
||||||
|
tools=[],
|
||||||
|
llm_engine=FakeLLMEngine(),
|
||||||
|
max_iterations=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
agent.run("Fake task")
|
||||||
|
|
||||||
|
self.assertEqual(agent.monitor.total_input_token_count, 10)
|
||||||
|
self.assertEqual(agent.monitor.total_output_token_count, 20)
|
||||||
|
|
||||||
|
def test_json_agent_metrics(self):
|
||||||
|
class FakeLLMEngine:
|
||||||
|
def __init__(self):
|
||||||
|
self.last_input_token_count = 10
|
||||||
|
self.last_output_token_count = 20
|
||||||
|
|
||||||
|
def __call__(self, prompt, **kwargs):
|
||||||
|
return 'Action:{"action": "final_answer", "action_input": {"answer": "image"}}'
|
||||||
|
|
||||||
|
agent = ReactJsonAgent(
|
||||||
|
tools=[],
|
||||||
|
llm_engine=FakeLLMEngine(),
|
||||||
|
max_iterations=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
agent.run("Fake task")
|
||||||
|
|
||||||
|
self.assertEqual(agent.monitor.total_input_token_count, 10)
|
||||||
|
self.assertEqual(agent.monitor.total_output_token_count, 20)
|
||||||
|
|
||||||
|
def test_code_agent_metrics_max_iterations(self):
|
||||||
|
class FakeLLMEngine:
|
||||||
|
def __init__(self):
|
||||||
|
self.last_input_token_count = 10
|
||||||
|
self.last_output_token_count = 20
|
||||||
|
|
||||||
|
def __call__(self, prompt, **kwargs):
|
||||||
|
return "Malformed answer"
|
||||||
|
|
||||||
|
agent = ReactCodeAgent(
|
||||||
|
tools=[],
|
||||||
|
llm_engine=FakeLLMEngine(),
|
||||||
|
max_iterations=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
agent.run("Fake task")
|
||||||
|
|
||||||
|
self.assertEqual(agent.monitor.total_input_token_count, 20)
|
||||||
|
self.assertEqual(agent.monitor.total_output_token_count, 40)
|
||||||
|
|
||||||
|
def test_code_agent_metrics_generation_error(self):
|
||||||
|
class FakeLLMEngine:
|
||||||
|
def __init__(self):
|
||||||
|
self.last_input_token_count = 10
|
||||||
|
self.last_output_token_count = 20
|
||||||
|
|
||||||
|
def __call__(self, prompt, **kwargs):
|
||||||
|
raise AgentError
|
||||||
|
|
||||||
|
agent = ReactCodeAgent(
|
||||||
|
tools=[],
|
||||||
|
llm_engine=FakeLLMEngine(),
|
||||||
|
max_iterations=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
agent.run("Fake task")
|
||||||
|
|
||||||
|
self.assertEqual(agent.monitor.total_input_token_count, 20)
|
||||||
|
self.assertEqual(agent.monitor.total_output_token_count, 40)
|
||||||
|
|
||||||
def test_streaming_agent_text_output(self):
|
def test_streaming_agent_text_output(self):
|
||||||
def dummy_llm_engine(prompt, **kwargs):
|
def dummy_llm_engine(prompt, **kwargs):
|
||||||
return """
|
return """
|
||||||
Code:
|
Code:
|
||||||
````
|
```py
|
||||||
final_answer('This is the final answer.')
|
final_answer('This is the final answer.')
|
||||||
```"""
|
```"""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user