Add visit webpage tool (#33353)

* Add VisitWebpageTool
2024-09-09 10:32:42 +02:00
parent 62aecd85ff
commit 489cbfd6d3
4 changed files with 51 additions and 3 deletions
--- a/docs/source/en/main_classes/agent.md
+++ b/docs/source/en/main_classes/agent.md
@@ -50,6 +50,10 @@ We provide two types of agents, based on the main [`Agent`] class:
 [[autodoc]] ReactCodeAgent
 ### ManagedAgent
 [[autodoc]] ManagedAgent
 ## Tools
 ### load_tool
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -58,6 +58,7 @@ _import_structure = {
        "Agent",
        "CodeAgent",
        "HfApiEngine",
        "ManagedAgent",
        "PipelineTool",
        "ReactAgent",
        "ReactCodeAgent",
@@ -4826,6 +4827,7 @@ if TYPE_CHECKING:
        Agent,
        CodeAgent,
        HfApiEngine,
        ManagedAgent,
        PipelineTool,
        ReactAgent,
        ReactCodeAgent,
--- a/src/transformers/agents/init.py
+++ b/src/transformers/agents/init.py
@@ -39,7 +39,7 @@ else:
    _import_structure["default_tools"] = ["FinalAnswerTool", "PythonInterpreterTool"]
    _import_structure["document_question_answering"] = ["DocumentQuestionAnsweringTool"]
    _import_structure["image_question_answering"] = ["ImageQuestionAnsweringTool"]
-    _import_structure["search"] = ["DuckDuckGoSearchTool"]
+    _import_structure["search"] = ["DuckDuckGoSearchTool", "VisitWebpageTool"]
    _import_structure["speech_to_text"] = ["SpeechToTextTool"]
    _import_structure["text_to_speech"] = ["TextToSpeechTool"]
    _import_structure["translation"] = ["TranslationTool"]
@@ -59,7 +59,7 @@ if TYPE_CHECKING:
        from .default_tools import FinalAnswerTool, PythonInterpreterTool
        from .document_question_answering import DocumentQuestionAnsweringTool
        from .image_question_answering import ImageQuestionAnsweringTool
-        from .search import DuckDuckGoSearchTool
+        from .search import DuckDuckGoSearchTool, VisitWebpageTool
        from .speech_to_text import SpeechToTextTool
        from .text_to_speech import TextToSpeechTool
        from .translation import TranslationTool
--- a/src/transformers/agents/search.py
+++ b/src/transformers/agents/search.py
@@ -14,6 +14,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
 import requests
 from requests.exceptions import RequestException
 from .tools import Tool
@@ -29,7 +34,44 @@ class DuckDuckGoSearchTool(Tool):
            from duckduckgo_search import DDGS
        except ImportError:
            raise ImportError(
-                "You must install package `duckduckgo_search`: for instance run `pip install duckduckgo-search`."
+                "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
            )
        results = DDGS().text(query, max_results=7)
        return results
 class VisitWebpageTool(Tool):
    name = "visit_webpage"
    description = "Visits a wbepage at the given url and returns its content as a markdown string."
    inputs = {
        "url": {
            "type": "text",
            "description": "The url of the webpage to visit.",
        }
    }
    output_type = "text"
    def forward(self, url: str) -> str:
        try:
            from markdownify import markdownify
        except ImportError:
            raise ImportError(
                "You must install package `markdownify` to run this tool: for instance run `pip install markdownify`."
            )
        try:
            # Send a GET request to the URL
            response = requests.get(url)
            response.raise_for_status()  # Raise an exception for bad status codes
            # Convert the HTML content to Markdown
            markdown_content = markdownify(response.text).strip()
            # Remove multiple line breaks
            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
            return markdown_content
        except RequestException as e:
            return f"Error fetching the webpage: {str(e)}"
        except Exception as e:
            return f"An unexpected error occurred: {str(e)}"