Add visit webpage tool (#33353)

* Add VisitWebpageTool
This commit is contained in:
Aymeric Roucher
2024-09-09 10:32:42 +02:00
committed by GitHub
parent 62aecd85ff
commit 489cbfd6d3
4 changed files with 51 additions and 3 deletions

View File

@@ -50,6 +50,10 @@ We provide two types of agents, based on the main [`Agent`] class:
[[autodoc]] ReactCodeAgent [[autodoc]] ReactCodeAgent
### ManagedAgent
[[autodoc]] ManagedAgent
## Tools ## Tools
### load_tool ### load_tool

View File

@@ -58,6 +58,7 @@ _import_structure = {
"Agent", "Agent",
"CodeAgent", "CodeAgent",
"HfApiEngine", "HfApiEngine",
"ManagedAgent",
"PipelineTool", "PipelineTool",
"ReactAgent", "ReactAgent",
"ReactCodeAgent", "ReactCodeAgent",
@@ -4826,6 +4827,7 @@ if TYPE_CHECKING:
Agent, Agent,
CodeAgent, CodeAgent,
HfApiEngine, HfApiEngine,
ManagedAgent,
PipelineTool, PipelineTool,
ReactAgent, ReactAgent,
ReactCodeAgent, ReactCodeAgent,

View File

@@ -39,7 +39,7 @@ else:
_import_structure["default_tools"] = ["FinalAnswerTool", "PythonInterpreterTool"] _import_structure["default_tools"] = ["FinalAnswerTool", "PythonInterpreterTool"]
_import_structure["document_question_answering"] = ["DocumentQuestionAnsweringTool"] _import_structure["document_question_answering"] = ["DocumentQuestionAnsweringTool"]
_import_structure["image_question_answering"] = ["ImageQuestionAnsweringTool"] _import_structure["image_question_answering"] = ["ImageQuestionAnsweringTool"]
_import_structure["search"] = ["DuckDuckGoSearchTool"] _import_structure["search"] = ["DuckDuckGoSearchTool", "VisitWebpageTool"]
_import_structure["speech_to_text"] = ["SpeechToTextTool"] _import_structure["speech_to_text"] = ["SpeechToTextTool"]
_import_structure["text_to_speech"] = ["TextToSpeechTool"] _import_structure["text_to_speech"] = ["TextToSpeechTool"]
_import_structure["translation"] = ["TranslationTool"] _import_structure["translation"] = ["TranslationTool"]
@@ -59,7 +59,7 @@ if TYPE_CHECKING:
from .default_tools import FinalAnswerTool, PythonInterpreterTool from .default_tools import FinalAnswerTool, PythonInterpreterTool
from .document_question_answering import DocumentQuestionAnsweringTool from .document_question_answering import DocumentQuestionAnsweringTool
from .image_question_answering import ImageQuestionAnsweringTool from .image_question_answering import ImageQuestionAnsweringTool
from .search import DuckDuckGoSearchTool from .search import DuckDuckGoSearchTool, VisitWebpageTool
from .speech_to_text import SpeechToTextTool from .speech_to_text import SpeechToTextTool
from .text_to_speech import TextToSpeechTool from .text_to_speech import TextToSpeechTool
from .translation import TranslationTool from .translation import TranslationTool

View File

@@ -14,6 +14,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
import requests
from requests.exceptions import RequestException
from .tools import Tool from .tools import Tool
@@ -29,7 +34,44 @@ class DuckDuckGoSearchTool(Tool):
from duckduckgo_search import DDGS from duckduckgo_search import DDGS
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"You must install package `duckduckgo_search`: for instance run `pip install duckduckgo-search`." "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
) )
results = DDGS().text(query, max_results=7) results = DDGS().text(query, max_results=7)
return results return results
class VisitWebpageTool(Tool):
name = "visit_webpage"
description = "Visits a wbepage at the given url and returns its content as a markdown string."
inputs = {
"url": {
"type": "text",
"description": "The url of the webpage to visit.",
}
}
output_type = "text"
def forward(self, url: str) -> str:
try:
from markdownify import markdownify
except ImportError:
raise ImportError(
"You must install package `markdownify` to run this tool: for instance run `pip install markdownify`."
)
try:
# Send a GET request to the URL
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad status codes
# Convert the HTML content to Markdown
markdown_content = markdownify(response.text).strip()
# Remove multiple line breaks
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
return markdown_content
except RequestException as e:
return f"Error fetching the webpage: {str(e)}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"