diff --git a/docs/source/en/agents.md b/docs/source/en/agents.md
index 67c4b8a91b..992e75ebe5 100644
--- a/docs/source/en/agents.md
+++ b/docs/source/en/agents.md
@@ -126,12 +126,13 @@ Additionally, `llm_engine` can also take a `grammar` argument. In the case where
 
 You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`.
 
-Now you can create an agent, like [`CodeAgent`], and run it. For convenience, we also provide the [`HfEngine`] class that uses `huggingface_hub.InferenceClient` under the hood.
+Now you can create an agent, like [`CodeAgent`], and run it. You can also create a [`TransformersEngine`] with a pre-initialized pipeline to run inference on your local machine using `transformers`.
+For convenience, since agentic behaviours generally require stronger models such as `Llama-3.1-70B-Instruct` that are harder to run locally for now, we also provide the [`HfApiEngine`] class that initializes a `huggingface_hub.InferenceClient` under the hood. 
 
 ```python
-from transformers import CodeAgent, HfEngine
+from transformers import CodeAgent, HfApiEngine
 
-llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+llm_engine = HfApiEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
 agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
 
 agent.run(
@@ -141,7 +142,7 @@ agent.run(
 ```
 
 This will be handy in case of emergency baguette need!
-You can even leave the argument `llm_engine` undefined, and an [`HfEngine`] will be created by default.
+You can even leave the argument `llm_engine` undefined, and an [`HfApiEngine`] will be created by default.
 
 ```python
 from transformers import CodeAgent
@@ -521,14 +522,14 @@ import gradio as gr
 from transformers import (
     load_tool,
     ReactCodeAgent,
-    HfEngine,
+    HfApiEngine,
     stream_to_gradio,
 )
 
 # Import tool from Hub
 image_generation_tool = load_tool("m-ric/text-to-image")
 
-llm_engine = HfEngine("meta-llama/Meta-Llama-3-70B-Instruct")
+llm_engine = HfApiEngine("meta-llama/Meta-Llama-3-70B-Instruct")
 
 # Initialize the agent with the image generation tool
 agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
diff --git a/docs/source/en/main_classes/agent.md b/docs/source/en/main_classes/agent.md
index 444003615b..a9fb944c62 100644
--- a/docs/source/en/main_classes/agent.md
+++ b/docs/source/en/main_classes/agent.md
@@ -87,12 +87,33 @@ These engines have the following specification:
 1. Follow the [messages format](../chat_templating.md) for its input (`List[Dict[str, str]]`) and return a string.
 2. Stop generating outputs *before* the sequences passed in the argument `stop_sequences`
 
-### HfEngine
+### TransformersEngine
 
-For convenience, we have added a `HfEngine` that implements the points above and uses an inference endpoint for the execution of the LLM.
+For convenience, we have added a `TransformersEngine` that implements the points above, taking a pre-initialized `Pipeline` as input.
 
 ```python
->>> from transformers import HfEngine
+>>> from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TransformersEngine
+
+>>> model_name = "HuggingFaceTB/SmolLM-135M-Instruct"
+>>> tokenizer = AutoTokenizer.from_pretrained(model_name)
+>>> model = AutoModelForCausalLM.from_pretrained(model_name)
+
+>>> pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+>>> engine = TransformersEngine(pipe)
+>>> engine([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])
+
+"What a "
+```
+
+[[autodoc]] TransformersEngine
+
+### HfApiEngine
+
+The `HfApiEngine` is an engine that wraps an [HF Inference API](https://huggingface.co/docs/api-inference/index) client for the execution of the LLM.
+
+```python
+>>> from transformers import HfApiEngine
 
 >>> messages = [
 ...   {"role": "user", "content": "Hello, how are you?"},
@@ -100,12 +121,12 @@ For convenience, we have added a `HfEngine` that implements the points above and
 ...   {"role": "user", "content": "No need to help, take it easy."},
 ... ]
 
->>> HfEngine()(messages, stop_sequences=["conversation"])
+>>> HfApiEngine()(messages, stop_sequences=["conversation"])
 
 "That's very kind of you to say! It's always nice to have a relaxed "
 ```
 
-[[autodoc]] HfEngine
+[[autodoc]] HfApiEngine
 
 
 ## Agent Types
diff --git a/docs/source/ko/main_classes/agent.md b/docs/source/ko/main_classes/agent.md
index 7e4a740907..d0ef630e2c 100644
--- a/docs/source/ko/main_classes/agent.md
+++ b/docs/source/ko/main_classes/agent.md
@@ -83,12 +83,12 @@ API나 기반 모델이 자주 업데이트되므로, 에이전트가 제공하
 1. 입력(`List[Dict[str, str]]`)에 대한 [메시지 형식](../chat_templating.md)을 따르고 문자열을 반환해야 합니다.
 2. 인수 `stop_sequences`에 시퀀스가 전달되기 *전에* 출력을 생성하는 것을 중지해야 합니다.
 
-### HfEngine [[hfengine]]
+### HfApiEngine [[HfApiEngine]]
 
-편의를 위해, 위의 사항을 구현하고 대규모 언어 모델 실행을 위해 추론 엔드포인트를 사용하는 `HfEngine`을 추가했습니다.
+편의를 위해, 위의 사항을 구현하고 대규모 언어 모델 실행을 위해 추론 엔드포인트를 사용하는 `HfApiEngine`을 추가했습니다.
 
 ```python
->>> from transformers import HfEngine
+>>> from transformers import HfApiEngine
 
 >>> messages = [
 ...   {"role": "user", "content": "Hello, how are you?"},
@@ -96,12 +96,12 @@ API나 기반 모델이 자주 업데이트되므로, 에이전트가 제공하
 ...   {"role": "user", "content": "No need to help, take it easy."},
 ... ]
 
->>> HfEngine()(messages, stop_sequences=["conversation"])
+>>> HfApiEngine()(messages, stop_sequences=["conversation"])
 
 "That's very kind of you to say! It's always nice to have a relaxed "
 ```
 
-[[autodoc]] HfEngine
+[[autodoc]] HfApiEngine
 
 
 ## 에이전트 유형 [[agent-types]]
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index 74870501f7..efbfb2bb93 100755
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -57,7 +57,7 @@ _import_structure = {
     "agents": [
         "Agent",
         "CodeAgent",
-        "HfEngine",
+        "HfApiEngine",
         "PipelineTool",
         "ReactAgent",
         "ReactCodeAgent",
@@ -65,6 +65,7 @@ _import_structure = {
         "Tool",
         "Toolbox",
         "ToolCollection",
+        "TransformersEngine",
         "launch_gradio_demo",
         "load_tool",
         "stream_to_gradio",
@@ -4806,7 +4807,7 @@ if TYPE_CHECKING:
     from .agents import (
         Agent,
         CodeAgent,
-        HfEngine,
+        HfApiEngine,
         PipelineTool,
         ReactAgent,
         ReactCodeAgent,
@@ -4814,6 +4815,7 @@ if TYPE_CHECKING:
         Tool,
         Toolbox,
         ToolCollection,
+        TransformersEngine,
         launch_gradio_demo,
         load_tool,
         stream_to_gradio,
diff --git a/src/transformers/agents/__init__.py b/src/transformers/agents/__init__.py
index c4de21a03d..f447d16580 100644
--- a/src/transformers/agents/__init__.py
+++ b/src/transformers/agents/__init__.py
@@ -25,7 +25,7 @@ from ..utils import (
 
 _import_structure = {
     "agents": ["Agent", "CodeAgent", "ReactAgent", "ReactCodeAgent", "ReactJsonAgent", "Toolbox"],
-    "llm_engine": ["HfEngine"],
+    "llm_engine": ["HfApiEngine", "TransformersEngine"],
     "monitoring": ["stream_to_gradio"],
     "tools": ["PipelineTool", "Tool", "ToolCollection", "launch_gradio_demo", "load_tool"],
 }
@@ -45,7 +45,7 @@ else:
 
 if TYPE_CHECKING:
     from .agents import Agent, CodeAgent, ReactAgent, ReactCodeAgent, ReactJsonAgent, Toolbox
-    from .llm_engine import HfEngine
+    from .llm_engine import HfApiEngine, TransformersEngine
     from .monitoring import stream_to_gradio
     from .tools import PipelineTool, Tool, ToolCollection, launch_gradio_demo, load_tool
 
diff --git a/src/transformers/agents/agents.py b/src/transformers/agents/agents.py
index 2f2316817b..8152b3213b 100644
--- a/src/transformers/agents/agents.py
+++ b/src/transformers/agents/agents.py
@@ -24,7 +24,7 @@ from ..utils import logging as transformers_logging
 from ..utils.import_utils import is_pygments_available
 from .agent_types import AgentAudio, AgentImage, AgentText
 from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool, setup_default_tools
-from .llm_engine import HfEngine, MessageRole
+from .llm_engine import HfApiEngine, MessageRole
 from .prompts import (
     DEFAULT_CODE_SYSTEM_PROMPT,
     DEFAULT_REACT_CODE_SYSTEM_PROMPT,
@@ -327,7 +327,7 @@ class Agent:
     def __init__(
         self,
         tools: Union[List[Tool], Toolbox],
-        llm_engine: Callable = HfEngine(),
+        llm_engine: Callable = HfApiEngine(),
         system_prompt=DEFAULT_REACT_CODE_SYSTEM_PROMPT,
         tool_description_template=None,
         additional_args={},
@@ -532,7 +532,7 @@ class CodeAgent(Agent):
     def __init__(
         self,
         tools: List[Tool],
-        llm_engine: Callable = HfEngine(),
+        llm_engine: Callable = HfApiEngine(),
         system_prompt: str = DEFAULT_CODE_SYSTEM_PROMPT,
         tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
         grammar: Dict[str, str] = None,
@@ -655,7 +655,7 @@ class ReactAgent(Agent):
     def __init__(
         self,
         tools: List[Tool],
-        llm_engine: Callable = HfEngine(),
+        llm_engine: Callable = HfApiEngine(),
         system_prompt: str = DEFAULT_REACT_CODE_SYSTEM_PROMPT,
         tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
         grammar: Dict[str, str] = None,
@@ -886,7 +886,7 @@ class ReactJsonAgent(ReactAgent):
     def __init__(
         self,
         tools: List[Tool],
-        llm_engine: Callable = HfEngine(),
+        llm_engine: Callable = HfApiEngine(),
         system_prompt: str = DEFAULT_REACT_JSON_SYSTEM_PROMPT,
         tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
         grammar: Dict[str, str] = None,
@@ -992,7 +992,7 @@ class ReactCodeAgent(ReactAgent):
     def __init__(
         self,
         tools: List[Tool],
-        llm_engine: Callable = HfEngine(),
+        llm_engine: Callable = HfApiEngine(),
         system_prompt: str = DEFAULT_REACT_CODE_SYSTEM_PROMPT,
         tool_description_template: str = DEFAULT_TOOL_DESCRIPTION_TEMPLATE,
         grammar: Dict[str, str] = None,
diff --git a/src/transformers/agents/llm_engine.py b/src/transformers/agents/llm_engine.py
index 09d6176b1e..5c36c2922f 100644
--- a/src/transformers/agents/llm_engine.py
+++ b/src/transformers/agents/llm_engine.py
@@ -20,6 +20,8 @@ from typing import Dict, List, Optional
 
 from huggingface_hub import InferenceClient
 
+from ..pipelines.base import Pipeline
+
 
 class MessageRole(str, Enum):
     USER = "user"
@@ -65,7 +67,9 @@ llama_role_conversions = {
 }
 
 
-class HfEngine:
+class HfApiEngine:
+    """This engine leverages Hugging Face's Inference API service, either serverless or with a dedicated endpoint."""
+
     def __init__(self, model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"):
         self.model = model
         self.client = InferenceClient(self.model, timeout=120)
@@ -93,6 +97,36 @@ class HfEngine:
         return response
 
 
+class TransformersEngine:
+    """This engine uses a pre-initialized local text-generation pipeline."""
+
+    def __init__(self, pipeline: Pipeline):
+        self.pipeline = pipeline
+
+    def __call__(
+        self, messages: List[Dict[str, str]], stop_sequences: Optional[List[str]] = None, grammar: Optional[str] = None
+    ) -> str:
+        # Get clean message list
+        messages = get_clean_message_list(messages, role_conversions=llama_role_conversions)
+
+        # Get LLM output
+        output = self.pipeline(
+            messages,
+            stop_strings=stop_sequences,
+            max_length=1500,
+            tokenizer=self.pipeline.tokenizer,
+        )
+
+        response = output[0]["generated_text"][-1]["content"]
+
+        # Remove stop sequences from LLM output
+        if stop_sequences is not None:
+            for stop_seq in stop_sequences:
+                if response[-len(stop_seq) :] == stop_seq:
+                    response = response[: -len(stop_seq)]
+        return response
+
+
 DEFAULT_JSONAGENT_REGEX_GRAMMAR = {
     "type": "regex",
     "value": 'Thought: .+?\\nAction:\\n\\{\\n\\s{4}"action":\\s"[^"\\n]+",\\n\\s{4}"action_input":\\s"[^"\\n]+"\\n\\}\\n<end_action>',