From 8023c95916355e7b61e7830f4185bca902c728b2 Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 12:22:24 +0800 Subject: [PATCH 01/10] [i18n] Add `zh` translation directory --- docs/source/zh/_config.py | 14 + docs/source/zh/_toctree.yml | 34 ++ .../zh/conceptual_guides/intro_agents.md | 118 ++++++ docs/source/zh/conceptual_guides/react.md | 47 +++ docs/source/zh/examples/multiagents.md | 199 ++++++++++ docs/source/zh/examples/rag.md | 156 ++++++++ docs/source/zh/examples/text_to_sql.md | 202 ++++++++++ docs/source/zh/guided_tour.md | 360 ++++++++++++++++++ docs/source/zh/index.md | 49 +++ docs/source/zh/reference/agents.md | 143 +++++++ docs/source/zh/reference/tools.md | 91 +++++ .../zh/tutorials/building_good_agents.md | 285 ++++++++++++++ .../zh/tutorials/secure_code_execution.md | 82 ++++ docs/source/zh/tutorials/tools.md | 222 +++++++++++ 14 files changed, 2002 insertions(+) create mode 100644 docs/source/zh/_config.py create mode 100644 docs/source/zh/_toctree.yml create mode 100644 docs/source/zh/conceptual_guides/intro_agents.md create mode 100644 docs/source/zh/conceptual_guides/react.md create mode 100644 docs/source/zh/examples/multiagents.md create mode 100644 docs/source/zh/examples/rag.md create mode 100644 docs/source/zh/examples/text_to_sql.md create mode 100644 docs/source/zh/guided_tour.md create mode 100644 docs/source/zh/index.md create mode 100644 docs/source/zh/reference/agents.md create mode 100644 docs/source/zh/reference/tools.md create mode 100644 docs/source/zh/tutorials/building_good_agents.md create mode 100644 docs/source/zh/tutorials/secure_code_execution.md create mode 100644 docs/source/zh/tutorials/tools.md diff --git a/docs/source/zh/_config.py b/docs/source/zh/_config.py new file mode 100644 index 000000000..81f6de049 --- /dev/null +++ b/docs/source/zh/_config.py @@ -0,0 +1,14 @@ +# docstyle-ignore +INSTALL_CONTENT = """ +# Installation +! pip install smolagents +# To install from source instead of the last release, comment the command above and uncomment the following one. +# ! pip install git+https://github.com/huggingface/smolagents.git +""" + +notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] +black_avoid_patterns = { + "{processor_class}": "FakeProcessorClass", + "{model_class}": "FakeModelClass", + "{object_class}": "FakeObjectClass", +} diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml new file mode 100644 index 000000000..bec73b788 --- /dev/null +++ b/docs/source/zh/_toctree.yml @@ -0,0 +1,34 @@ +- title: Get started + sections: + - local: index + title: 🤗 Agents + - local: guided_tour + title: Guided tour +- title: Tutorials + sections: + - local: tutorials/building_good_agents + title: ✨ Building good agents + - local: tutorials/tools + title: 🛠️ Tools - in-depth guide + - local: tutorials/secure_code_execution + title: 🛡️ Secure your code execution with E2B +- title: Conceptual guides + sections: + - local: conceptual_guides/intro_agents + title: 🤖 An introduction to agentic systems + - local: conceptual_guides/react + title: 🤔 How do Multi-step agents work? +- title: Examples + sections: + - local: examples/text_to_sql + title: Self-correcting Text-to-SQL + - local: examples/rag + title: Master you knowledge base with agentic RAG + - local: examples/multiagents + title: Orchestrate a multi-agent system +- title: Reference + sections: + - local: reference/agents + title: Agent-related objects + - local: reference/tools + title: Tool-related objects diff --git a/docs/source/zh/conceptual_guides/intro_agents.md b/docs/source/zh/conceptual_guides/intro_agents.md new file mode 100644 index 000000000..c233b39bb --- /dev/null +++ b/docs/source/zh/conceptual_guides/intro_agents.md @@ -0,0 +1,118 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Introduction to Agents + +## 🤔 What are agents? + +Any efficient system using AI will need to provide LLMs some kind of access to the real world: for instance the possibility to call a search tool to get external information, or to act on certain programs in order to solve a task. In other words, LLMs should have ***agency***. Agentic programs are the gateway to the outside world for LLMs. + +> [!TIP] +> AI Agents are **programs where LLM outputs control the workflow**. + +Any system leveraging LLMs will integrate the LLM outputs into code. The influence of the LLM's input on the code workflow is the level of agency of LLMs in the system. + +Note that with this definition, "agent" is not a discrete, 0 or 1 definition: instead, "agency" evolves on a continuous spectrum, as you give more or less power to the LLM on your workflow. + +See in the table below how agency can vary across systems: + +| Agency Level | Description | How that's called | Example Pattern | +| ------------ | ------------------------------------------------------- | ----------------- | -------------------------------------------------- | +| ☆☆☆ | LLM output has no impact on program flow | Simple Processor | `process_llm_output(llm_response)` | +| ★☆☆ | LLM output determines an if/else switch | Router | `if llm_decision(): path_a() else: path_b()` | +| ★★☆ | LLM output determines function execution | Tool Caller | `run_function(llm_chosen_tool, llm_chosen_args)` | +| ★★★ | LLM output controls iteration and program continuation | Multi-step Agent | `while llm_should_continue(): execute_next_step()` | +| ★★★ | One agentic workflow can start another agentic workflow | Multi-Agent | `if llm_trigger(): execute_agent()` | + +The multi-step agent has this code structure: + +```python +memory = [user_defined_task] +while llm_should_continue(memory): # this loop is the multi-step part + action = llm_get_next_action(memory) # this is the tool-calling part + observations = execute_action(action) + memory += [action, observations] +``` + +This agentic system runs in a loop, executing a new action at each step (the action can involve calling some pre-determined *tools* that are just functions), until its observations make it apparent that a satisfactory state has been reached to solve the given task. Here’s an example of how a multi-step agent can solve a simple math question: + +<div class="flex justify-center"> + <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"/> +</div> + + +## ✅ When to use agents / ⛔ when to avoid them + +Agents are useful when you need an LLM to determine the workflow of an app. But they’re often overkill. The question is: do I really need flexibility in the workflow to efficiently solve the task at hand? +If the pre-determined workflow falls short too often, that means you need more flexibility. +Let's take an example: say you're making an app that handles customer requests on a surfing trip website. + +You could know in advance that the requests will can belong to either of 2 buckets (based on user choice), and you have a predefined workflow for each of these 2 cases. + +1. Want some knowledge on the trips? ⇒ give them access to a search bar to search your knowledge base +2. Wants to talk to sales? ⇒ let them type in a contact form. + +If that deterministic workflow fits all queries, by all means just code everything! This will give you a 100% reliable system with no risk of error introduced by letting unpredictable LLMs meddle in your workflow. For the sake of simplicity and robustness, it's advised to regularize towards not using any agentic behaviour. + +But what if the workflow can't be determined that well in advance? + +For instance, a user wants to ask : `"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"` This question hinges on many factors, and probably none of the predetermined criteria above will suffice for this request. + +If the pre-determined workflow falls short too often, that means you need more flexibility. + +That is where an agentic setup helps. + +In the above example, you could just make a multi-step agent that has access to a weather API for weather forecasts, Google Maps API to compute travel distance, an employee availability dashboard and a RAG system on your knowledge base. + +Until recently, computer programs were restricted to pre-determined workflows, trying to handle complexity by piling up if/else switches. They focused on extremely narrow tasks, like "compute the sum of these numbers" or "find the shortest path in this graph". But actually, most real-life tasks, like our trip example above, do not fit in pre-determined workflows. Agentic systems open up the vast world of real-world tasks to programs! + +## Why `smolagents`? + +For some low-level agentic use cases, like chains or routers, you can write all the code yourself. You'll be much better that way, since it will let you control and understand your system better. + +But once you start going for more complicated behaviours like letting an LLM call a function (that's "tool calling") or letting an LLM run a while loop ("multi-step agent"), some abstractions become necessary: +- for tool calling, you need to parse the agent's output, so this output needs a predefined format like "Thought: I should call tool 'get_weather'. Action: get_weather(Paris).", that you parse with a predefined function, and system prompt given to the LLM should notify it about this format. +- for a multi-step agent where the LLM output determines the loop, you need to give a different prompt to the LLM based on what happened in the last loop iteration: so you need some kind of memory. + +See? With these two examples, we already found the need for a few items to help us: + +- Of course, an LLM that acts as the engine powering the system +- A list of tools that the agent can access +- A parser that extracts tool calls from the LLM output +- A system prompt synced with the parser +- A memory + +But wait, since we give room to LLMs in decisions, surely they will make mistakes: so we need error logging and retry mechanisms. + +All these elements need tight coupling to make a well-functioning system. That's why we decided we needed to make basic building blocks to make all this stuff work together. + +## Code agents + +In a multi-step agent, at each step, the LLM can write an action, in the form of some calls to external tools. A common format (used by Anthropic, OpenAI, and many others) for writing these actions is generally different shades of "writing actions as a JSON of tools names and arguments to use, which you then parse to know which tool to execute and with which arguments". + +[Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the tool calling LLMs in code is much better. + +The reason for this simply that *we crafted our code languages specifically to be the best possible way to express actions performed by a computer*. If JSON snippets were a better expression, JSON would be the top programming language and programming would be hell on earth. + +The figure below, taken from [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030), illustrate some advantages of writing actions in code: + +<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png"> + +Writing actions in code rather than JSON-like snippets provides better: + +- **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function? +- **Object management:** how do you store the output of an action like `generate_image` in JSON? +- **Generality:** code is built to express simply anything you can have a computer do. +- **Representation in LLM training data:** plenty of quality code actions is already included in LLMs’ training data which means they’re already trained for this! diff --git a/docs/source/zh/conceptual_guides/react.md b/docs/source/zh/conceptual_guides/react.md new file mode 100644 index 000000000..d85c9cad3 --- /dev/null +++ b/docs/source/zh/conceptual_guides/react.md @@ -0,0 +1,47 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# How do multi-step agents work? + +The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents. + +The name is based on the concatenation of two words, "Reason" and "Act." Indeed, agents following this architecture will solve their task in as many steps as needed, each step consisting of a Reasoning step, then an Action step where it formulates tool calls that will bring it closer to solving the task at hand. + +React process involves keeping a memory of past steps. + +> [!TIP] +> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about multi-step agents. + +Here is a video overview of how that works: + +<div class="flex justify-center"> + <img + class="block dark:hidden" + src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif" + /> + <img + class="hidden dark:block" + src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif" + /> +</div> + + + +We implement two versions of ToolCallingAgent: +- [`ToolCallingAgent`] generates tool calls as a JSON in its output. +- [`CodeAgent`] is a new type of ToolCallingAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance. + +> [!TIP] +> We also provide an option to run agents in one-shot: just pass `single_step=True` when launching the agent, like `agent.run(your_task, single_step=True)` \ No newline at end of file diff --git a/docs/source/zh/examples/multiagents.md b/docs/source/zh/examples/multiagents.md new file mode 100644 index 000000000..4ea4e51b2 --- /dev/null +++ b/docs/source/zh/examples/multiagents.md @@ -0,0 +1,199 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Orchestrate a multi-agent system 🤖🤝🤖 + +[[open-in-colab]] + +In this notebook we will make a **multi-agent web browser: an agentic system with several agents collaborating to solve problems using the web!** + +It will be a simple hierarchy, using a `ManagedAgent` object to wrap the managed web search agent: + +``` + +----------------+ + | Manager agent | + +----------------+ + | + _______________|______________ + | | + Code interpreter +--------------------------------+ + tool | Managed agent | + | +------------------+ | + | | Web Search agent | | + | +------------------+ | + | | | | + | Web Search tool | | + | Visit webpage tool | + +--------------------------------+ +``` +Let's set up this system. + +Run the line below to install the required dependencies: + +``` +!pip install markdownify duckduckgo-search smolagents --upgrade -q +``` + +Let's login in order to call the HF Inference API: + +```py +from huggingface_hub import notebook_login + +notebook_login() +``` + +⚡️ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `HfApiModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model. + +_Note:_ The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models). + +```py +model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" +``` + +## 🔍 Create a web search tool + +For web browsing, we can already use our pre-existing [`DuckDuckGoSearchTool`](https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools/search.py) tool to provide a Google search equivalent. + +But then we will also need to be able to peak into the page found by the `DuckDuckGoSearchTool`. +To do so, we could import the library's built-in `VisitWebpageTool`, but we will build it again to see how it's done. + +So let's create our `VisitWebpageTool` tool from scratch using `markdownify`. + +```py +import re +import requests +from markdownify import markdownify +from requests.exceptions import RequestException +from smolagents import tool + + +@tool +def visit_webpage(url: str) -> str: + """Visits a webpage at the given URL and returns its content as a markdown string. + + Args: + url: The URL of the webpage to visit. + + Returns: + The content of the webpage converted to Markdown, or an error message if the request fails. + """ + try: + # Send a GET request to the URL + response = requests.get(url) + response.raise_for_status() # Raise an exception for bad status codes + + # Convert the HTML content to Markdown + markdown_content = markdownify(response.text).strip() + + # Remove multiple line breaks + markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) + + return markdown_content + + except RequestException as e: + return f"Error fetching the webpage: {str(e)}" + except Exception as e: + return f"An unexpected error occurred: {str(e)}" +``` + +Ok, now let's initialize and test our tool! + +```py +print(visit_webpage("https://en.wikipedia.org/wiki/Hugging_Face")[:500]) +``` + +## Build our multi-agent system 🤖🤝🤖 + +Now that we have all the tools `search` and `visit_webpage`, we can use them to create the web agent. + +Which configuration to choose for this agent? +- Web browsing is a single-timeline task that does not require parallel tool calls, so JSON tool calling works well for that. We thus choose a `JsonAgent`. +- Also, since sometimes web search requires exploring many pages before finding the correct answer, we prefer to increase the number of `max_steps` to 10. + +```py +from smolagents import ( + CodeAgent, + ToolCallingAgent, + HfApiModel, + ManagedAgent, + DuckDuckGoSearchTool, + LiteLLMModel, +) + +model = HfApiModel(model_id) + +web_agent = ToolCallingAgent( + tools=[DuckDuckGoSearchTool(), visit_webpage], + model=model, + max_steps=10, +) +``` + +We then wrap this agent into a `ManagedAgent` that will make it callable by its manager agent. + +```py +managed_web_agent = ManagedAgent( + agent=web_agent, + name="search", + description="Runs web searches for you. Give it your query as an argument.", +) +``` + +Finally we create a manager agent, and upon initialization we pass our managed agent to it in its `managed_agents` argument. + +Since this agent is the one tasked with the planning and thinking, advanced reasoning will be beneficial, so a `CodeAgent` will be the best choice. + +Also, we want to ask a question that involves the current year and does additional data calculations: so let us add `additional_authorized_imports=["time", "numpy", "pandas"]`, just in case the agent needs these packages. + +```py +manager_agent = CodeAgent( + tools=[], + model=model, + managed_agents=[managed_web_agent], + additional_authorized_imports=["time", "numpy", "pandas"], +) +``` + +That's all! Now let's run our system! We select a question that requires both some calculation and research: + +```py +answer = manager_agent.run("If LLM training continues to scale up at the current rhythm until 2030, what would be the electric power in GW required to power the biggest training runs by 2030? What would that correspond to, compared to some countries? Please provide a source for any numbers used.") +``` + +We get this report as the answer: +``` +Based on current growth projections and energy consumption estimates, if LLM trainings continue to scale up at the +current rhythm until 2030: + +1. The electric power required to power the biggest training runs by 2030 would be approximately 303.74 GW, which +translates to about 2,660,762 GWh/year. + +2. Comparing this to countries' electricity consumption: + - It would be equivalent to about 34% of China's total electricity consumption. + - It would exceed the total electricity consumption of India (184%), Russia (267%), and Japan (291%). + - It would be nearly 9 times the electricity consumption of countries like Italy or Mexico. + +3. Source of numbers: + - The initial estimate of 5 GW for future LLM training comes from AWS CEO Matt Garman. + - The growth projection used a CAGR of 79.80% from market research by Springs. + - Country electricity consumption data is from the U.S. Energy Information Administration, primarily for the year +2021. +``` + +Seems like we'll need some sizeable powerplants if the [scaling hypothesis](https://gwern.net/scaling-hypothesis) continues to hold true. + +Our agents managed to efficiently collaborate towards solving the task! ✅ + +💡 You can easily extend this orchestration to more agents: one does the code execution, one the web search, one handles file loadings... \ No newline at end of file diff --git a/docs/source/zh/examples/rag.md b/docs/source/zh/examples/rag.md new file mode 100644 index 000000000..acbdf14f6 --- /dev/null +++ b/docs/source/zh/examples/rag.md @@ -0,0 +1,156 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Agentic RAG + +[[open-in-colab]] + +Retrieval-Augmented-Generation (RAG) is “using an LLM to answer a user query, but basing the answer on information retrieved from a knowledge base”. It has many advantages over using a vanilla or fine-tuned LLM: to name a few, it allows to ground the answer on true facts and reduce confabulations, it allows to provide the LLM with domain-specific knowledge, and it allows fine-grained control of access to information from the knowledge base. + +But vanilla RAG has limitations, most importantly these two: +- It performs only one retrieval step: if the results are bad, the generation in turn will be bad. +- Semantic similarity is computed with the user query as a reference, which might be suboptimal: for instance, the user query will often be a question and the document containing the true answer will be in affirmative voice, so its similarity score will be downgraded compared to other source documents in the interrogative form, leading to a risk of missing the relevant information. + +We can alleviate these problems by making a RAG agent: very simply, an agent armed with a retriever tool! + +This agent will: ✅ Formulate the query itself and ✅ Critique to re-retrieve if needed. + +So it should naively recover some advanced RAG techniques! +- Instead of directly using the user query as the reference in semantic search, the agent formulates itself a reference sentence that can be closer to the targeted documents, as in [HyDE](https://huggingface.co/papers/2212.10496). +The agent can use the generated snippets and re-retrieve if needed, as in [Self-Query](https://docs.llamaindex.ai/en/stable/examples/evaluation/RetryQuery/). + +Let's build this system. 🛠️ + +Run the line below to install required dependencies: +```bash +!pip install smolagents pandas langchain langchain-community sentence-transformers rank_bm25 --upgrade -q +``` +To call the HF Inference API, you will need a valid token as your environment variable `HF_TOKEN`. +We use python-dotenv to load it. +```py +from dotenv import load_dotenv +load_dotenv() +``` + +We first load a knowledge base on which we want to perform RAG: this dataset is a compilation of the documentation pages for many Hugging Face libraries, stored as markdown. We will keep only the documentation for the `transformers` library. + +Then prepare the knowledge base by processing the dataset and storing it into a vector database to be used by the retriever. + +We use [LangChain](https://python.langchain.com/docs/introduction/) for its excellent vector database utilities. + +```py +import datasets +from langchain.docstore.document import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.retrievers import BM25Retriever + +knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train") +knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers")) + +source_docs = [ + Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) + for doc in knowledge_base +] + +text_splitter = RecursiveCharacterTextSplitter( + chunk_size=500, + chunk_overlap=50, + add_start_index=True, + strip_whitespace=True, + separators=["\n\n", "\n", ".", " ", ""], +) +docs_processed = text_splitter.split_documents(source_docs) +``` + +Now the documents are ready. + +So let’s build our agentic RAG system! + +👉 We only need a RetrieverTool that our agent can leverage to retrieve information from the knowledge base. + +Since we need to add a vectordb as an attribute of the tool, we cannot simply use the simple tool constructor with a `@tool` decorator: so we will follow the advanced setup highlighted in the [tools tutorial](../tutorials/tools). + +```py +from smolagents import Tool + +class RetrieverTool(Tool): + name = "retriever" + description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query." + inputs = { + "query": { + "type": "string", + "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.", + } + } + output_type = "string" + + def __init__(self, docs, **kwargs): + super().__init__(**kwargs) + self.retriever = BM25Retriever.from_documents( + docs, k=10 + ) + + def forward(self, query: str) -> str: + assert isinstance(query, str), "Your search query must be a string" + + docs = self.retriever.invoke( + query, + ) + return "\nRetrieved documents:\n" + "".join( + [ + f"\n\n===== Document {str(i)} =====\n" + doc.page_content + for i, doc in enumerate(docs) + ] + ) + +retriever_tool = RetrieverTool(docs_processed) +``` +We have used BM25, a classic retrieval method, because it's lightning fast to setup. +To improve retrieval accuracy, you could use replace BM25 with semantic search using vector representations for documents: thus you can head to the [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) to select a good embedding model. + +Now it’s straightforward to create an agent that leverages this `retriever_tool`! + +The agent will need these arguments upon initialization: +- `tools`: a list of tools that the agent will be able to call. +- `model`: the LLM that powers the agent. +Our `model` must be a callable that takes as input a list of messages and returns text. It also needs to accept a stop_sequences argument that indicates when to stop its generation. For convenience, we directly use the HfEngine class provided in the package to get a LLM engine that calls Hugging Face's Inference API. + +And we use [meta-llama/Llama-3.3-70B-Instruct](meta-llama/Llama-3.3-70B-Instruct) as the llm engine because: +- It has a long 128k context, which is helpful for processing long source documents +- It is served for free at all times on HF's Inference API! + +_Note:_ The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models). + +```py +from smolagents import HfApiModel, CodeAgent + +agent = CodeAgent( + tools=[retriever_tool], model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True +) +``` + +Upon initializing the CodeAgent, it has been automatically given a default system prompt that tells the LLM engine to process step-by-step and generate tool calls as code snippets, but you could replace this prompt template with your own as needed. + +Then when its `.run()` method is launched, the agent takes care of calling the LLM engine, and executing the tool calls, all in a loop that ends only when tool `final_answer` is called with the final answer as its argument. + +```py +agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?") + +print("Final output:") +print(agent_output) +``` + + + diff --git a/docs/source/zh/examples/text_to_sql.md b/docs/source/zh/examples/text_to_sql.md new file mode 100644 index 000000000..12d0c5e47 --- /dev/null +++ b/docs/source/zh/examples/text_to_sql.md @@ -0,0 +1,202 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Text-to-SQL + +[[open-in-colab]] + +In this tutorial, we’ll see how to implement an agent that leverages SQL using `smolagents`. + +> Let's start with the golden question: why not keep it simple and use a standard text-to-SQL pipeline? + +A standard text-to-sql pipeline is brittle, since the generated SQL query can be incorrect. Even worse, the query could be incorrect, but not raise an error, instead giving some incorrect/useless outputs without raising an alarm. + +👉 Instead, an agent system is able to critically inspect outputs and decide if the query needs to be changed or not, thus giving it a huge performance boost. + +Let’s build this agent! 💪 + +First, we setup the SQL environment: +```py +from sqlalchemy import ( + create_engine, + MetaData, + Table, + Column, + String, + Integer, + Float, + insert, + inspect, + text, +) + +engine = create_engine("sqlite:///:memory:") +metadata_obj = MetaData() + +# create city SQL table +table_name = "receipts" +receipts = Table( + table_name, + metadata_obj, + Column("receipt_id", Integer, primary_key=True), + Column("customer_name", String(16), primary_key=True), + Column("price", Float), + Column("tip", Float), +) +metadata_obj.create_all(engine) + +rows = [ + {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20}, + {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24}, + {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43}, + {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00}, +] +for row in rows: + stmt = insert(receipts).values(**row) + with engine.begin() as connection: + cursor = connection.execute(stmt) +``` + +### Build our agent + +Now let’s make our SQL table retrievable by a tool. + +The tool’s description attribute will be embedded in the LLM’s prompt by the agent system: it gives the LLM information about how to use the tool. This is where we want to describe the SQL table. + +```py +inspector = inspect(engine) +columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")] + +table_description = "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info]) +print(table_description) +``` + +```text +Columns: + - receipt_id: INTEGER + - customer_name: VARCHAR(16) + - price: FLOAT + - tip: FLOAT +``` + +Now let’s build our tool. It needs the following: (read [the tool doc](../tutorials/tools) for more detail) +- A docstring with an `Args:` part listing arguments. +- Type hints on both inputs and output. + +```py +from smolagents import tool + +@tool +def sql_engine(query: str) -> str: + """ + Allows you to perform SQL queries on the table. Returns a string representation of the result. + The table is named 'receipts'. Its description is as follows: + Columns: + - receipt_id: INTEGER + - customer_name: VARCHAR(16) + - price: FLOAT + - tip: FLOAT + + Args: + query: The query to perform. This should be correct SQL. + """ + output = "" + with engine.connect() as con: + rows = con.execute(text(query)) + for row in rows: + output += "\n" + str(row) + return output +``` + +Now let us create an agent that leverages this tool. + +We use the `CodeAgent`, which is smolagents’ main agent class: an agent that writes actions in code and can iterate on previous output according to the ReAct framework. + +The model is the LLM that powers the agent system. HfApiModel allows you to call LLMs using HF’s Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API. + +```py +from smolagents import CodeAgent, HfApiModel + +agent = CodeAgent( + tools=[sql_engine], + model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"), +) +agent.run("Can you give me the name of the client who got the most expensive receipt?") +``` + +### Level 2: Table joins + +Now let’s make it more challenging! We want our agent to handle joins across multiple tables. + +So let’s make a second table recording the names of waiters for each receipt_id! + +```py +table_name = "waiters" +receipts = Table( + table_name, + metadata_obj, + Column("receipt_id", Integer, primary_key=True), + Column("waiter_name", String(16), primary_key=True), +) +metadata_obj.create_all(engine) + +rows = [ + {"receipt_id": 1, "waiter_name": "Corey Johnson"}, + {"receipt_id": 2, "waiter_name": "Michael Watts"}, + {"receipt_id": 3, "waiter_name": "Michael Watts"}, + {"receipt_id": 4, "waiter_name": "Margaret James"}, +] +for row in rows: + stmt = insert(receipts).values(**row) + with engine.begin() as connection: + cursor = connection.execute(stmt) +``` +Since we changed the table, we update the `SQLExecutorTool` with this table’s description to let the LLM properly leverage information from this table. + +```py +updated_description = """Allows you to perform SQL queries on the table. Beware that this tool's output is a string representation of the execution output. +It can use the following tables:""" + +inspector = inspect(engine) +for table in ["receipts", "waiters"]: + columns_info = [(col["name"], col["type"]) for col in inspector.get_columns(table)] + + table_description = f"Table '{table}':\n" + + table_description += "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info]) + updated_description += "\n\n" + table_description + +print(updated_description) +``` +Since this request is a bit harder than the previous one, we’ll switch the LLM engine to use the more powerful [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)! + +```py +sql_engine.description = updated_description + +agent = CodeAgent( + tools=[sql_engine], + model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"), +) + +agent.run("Which waiter got more total money from tips?") +``` +It directly works! The setup was surprisingly simple, wasn’t it? + +This example is done! We've touched upon these concepts: +- Building new tools. +- Updating a tool's description. +- Switching to a stronger LLM helps agent reasoning. + +✅ Now you can go build this text-to-SQL system you’ve always dreamt of! ✨ \ No newline at end of file diff --git a/docs/source/zh/guided_tour.md b/docs/source/zh/guided_tour.md new file mode 100644 index 000000000..4e2fe44d4 --- /dev/null +++ b/docs/source/zh/guided_tour.md @@ -0,0 +1,360 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Agents - Guided tour + +[[open-in-colab]] + +In this guided visit, you will learn how to build an agent, how to run it, and how to customize it to make it work better for your use-case. + +### Building your agent + +To initialize a minimal agent, you need at least these two arguments: + +- `model`, a text-generation model to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. You can use any of these options: + - [`TransformersModel`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`. + - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood. + - [`LiteLLMModel`] lets you call 100+ different models through [LiteLLM](https://docs.litellm.ai/)! + +- `tools`, A list of `Tools` that the agent can use to solve the task. It can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`. + +Once you have these two arguments, `tools` and `model`, you can create an agent and run it. You can use any LLM you'd like, either through [Hugging Face API](https://huggingface.co/docs/api-inference/en/index), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), or [LiteLLM](https://www.litellm.ai/). + +<hfoptions id="Pick a LLM"> +<hfoption id="Hugging Face API"> + +Hugging Face API is free to use without a token, but then it will have a rate limitation. + +To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `HfApiModel`. + +```python +from smolagents import CodeAgent, HfApiModel + +model_id = "meta-llama/Llama-3.3-70B-Instruct" + +model = HfApiModel(model_id=model_id, token="<YOUR_HUGGINGFACEHUB_API_TOKEN>") +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` +</hfoption> +<hfoption id="Local Transformers Model"> + +```python +from smolagents import CodeAgent, TransformersModel + +model_id = "meta-llama/Llama-3.2-3B-Instruct" + +model = TransformersModel(model_id=model_id) +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` +</hfoption> +<hfoption id="OpenAI or Anthropic API"> + +To use `LiteLLMModel`, you need to set the environment variable `ANTHROPIC_API_KEY` or `OPENAI_API_KEY`, or pass `api_key` variable upon initialization. + +```python +from smolagents import CodeAgent, LiteLLMModel + +model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", api_key="YOUR_ANTHROPIC_API_KEY") # Could use 'gpt-4o' +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` +</hfoption> +<hfoption id="Ollama"> + +```python +from smolagents import CodeAgent, LiteLLMModel + +model = LiteLLMModel( + model_id="ollama_chat/llama3.2", # This model is a bit weak for agentic behaviours though + api_base="http://localhost:11434", # replace with remote open-ai compatible server if necessary + api_key="YOUR_API_KEY" # replace with API key if necessary +) + +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` +</hfoption> +</hfoptions> + +#### CodeAgent and ToolCallingAgent + +The [`CodeAgent`] is our default agent. It will write and execute python code snippets at each step. + +By default, the execution is done in your local environment. +This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and a set of predefined safe functions like `print` or functions from the `math` module, so you're already limited in what can be executed. + +The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue. +You can authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`CodeAgent`]: + +```py +from smolagents import CodeAgent + +agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4']) +agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") +``` + +> [!WARNING] +> The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports! + +The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent. + +You can also use [E2B code executor](https://e2b.dev/docs#what-is-e2-b) instead of a local Python interpreter by first [setting the `E2B_API_KEY` environment variable](https://e2b.dev/dashboard?tab=keys) and then passing `use_e2b_executor=True` upon agent initialization. + +> [!TIP] +> Learn more about code execution [in this tutorial](tutorials/secure_code_execution). + +We also support the widely-used way of writing actions as JSON-like blobs: this is [`ToolCallingAgent`], it works much in the same way like [`CodeAgent`], of course without `additional_authorized_imports` since it doesn't execute code: + +```py +from smolagents import ToolCallingAgent + +agent = ToolCallingAgent(tools=[], model=model) +agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") +``` + +### Inspecting an agent run + +Here are a few useful attributes to inspect what happened after a run: +- `agent.logs` stores the fine-grained logs of the agent. At every step of the agent's run, everything gets stored in a dictionary that then is appended to `agent.logs`. +- Running `agent.write_inner_memory_from_logs()` creates an inner memory of the agent's logs for the LLM to view, as a list of chat messages. This method goes over each step of the log and only stores what it's interested in as a message: for instance, it will save the system prompt and task in separate messages, then for each step it will store the LLM output as a message, and the tool call output as another message. Use this if you want a higher-level view of what has happened - but not every log will be transcripted by this method. + +## Tools + +A tool is an atomic function to be used by an agent. To be used by an LLM, it also needs a few attributes that constitute its API and will be used to describe to the LLM how to call this tool: +- A name +- A description +- Input types and descriptions +- An output type + +You can for instance check the [`PythonInterpreterTool`]: it has a name, a description, input descriptions, an output type, and a `forward` method to perform the action. + +When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why. + +### Default toolbox + +Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools = True`: + +- **DuckDuckGo web search***: performs a web search using DuckDuckGo browser. +- **Python code interpreter**: runs your the LLM generated Python code in a secure environment. This tool will only be added to [`ToolCallingAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code +- **Transcriber**: a speech-to-text pipeline built on Whisper-Turbo that transcribes an audio to text. + +You can manually use a tool by calling the [`load_tool`] function and a task to perform. + +```python +from smolagents import load_tool + +search_tool = load_tool("web_search") +print(search_tool("Who's the current president of Russia?")) +``` + +### Create a new tool + +You can create your own tool for use cases not covered by the default tools from Hugging Face. +For example, let's create a tool that returns the most downloaded model for a given task from the Hub. + +You'll start with the code below. + +```python +from huggingface_hub import list_models + +task = "text-classification" + +most_downloaded_model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) +print(most_downloaded_model.id) +``` + +This code can quickly be converted into a tool, just by wrapping it in a function and adding the `tool` decorator: +This is not the only way to build the tool: you can directly define it as a subclass of [`Tool`], which gives you more flexibility, for instance the possibility to initialize heavy class attributes. + +Let's see how it works for both options: + +<hfoptions id="build-a-tool"> +<hfoption id="Decorate a function with @tool"> + +```py +from smolagents import tool + +@tool +def model_download_tool(task: str) -> str: + """ + This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. + It returns the name of the checkpoint. + + Args: + task: The task for which to get the download count. + """ + most_downloaded_model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) + return most_downloaded_model.id +``` + +The function needs: +- A clear name. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. +- Type hints on both inputs and output +- A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). Same as for the tool name, this description is an instruction manual for the LLM powering you agent, so do not neglect it. +All these elements will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! + +> [!TIP] +> This definition format is the same as tool schemas used in `apply_chat_template`, the only difference is the added `tool` decorator: read more on our tool use API [here](https://huggingface.co/blog/unified-tool-use#passing-tools-to-a-chat-template). +</hfoption> +<hfoption id="Subclass Tool"> + +```py +from smolagents import Tool + +class ModelDownloadTool(Tool): + name = "model_download_tool" + description = "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. It returns the name of the checkpoint." + inputs = {"task": {"type": "string", "description": "The task for which to get the download count."}} + output_type = "string" + + def forward(self, task: str) -> str: + most_downloaded_model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) + return most_downloaded_model.id +``` + +The subclass needs the following attributes: +- A clear `name`. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. +- A `description`. Same as for the `name`, this description is an instruction manual for the LLM powering you agent, so do not neglect it. +- Input types and descriptions +- Output type +All these attributes will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! +</hfoption> +</hfoptions> + + +Then you can directly initialize your agent: +```py +from smolagents import CodeAgent, HfApiModel +agent = CodeAgent(tools=[model_download_tool], model=HfApiModel()) +agent.run( + "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" +) +``` + +You get the following logs: +```text +╭──────────────────────────────────────── New run ─────────────────────────────────────────╮ +│ │ +│ Can you give me the name of the model that has the most downloads in the 'text-to-video' │ +│ task on the Hugging Face Hub? │ +│ │ +╰─ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮ +│ 1 model_name = model_download_tool(task="text-to-video") │ +│ 2 print(model_name) │ +╰──────────────────────────────────────────────────────────────────────────────────────────╯ +Execution logs: +ByteDance/AnimateDiff-Lightning + +Out: None +[Step 0: Duration 0.27 seconds| Input tokens: 2,069 | Output tokens: 60] +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮ +│ 1 final_answer("ByteDance/AnimateDiff-Lightning") │ +╰──────────────────────────────────────────────────────────────────────────────────────────╯ +Out - Final answer: ByteDance/AnimateDiff-Lightning +[Step 1: Duration 0.10 seconds| Input tokens: 4,288 | Output tokens: 148] +Out[20]: 'ByteDance/AnimateDiff-Lightning' +``` + +> [!TIP] +> Read more on tools in the [dedicated tutorial](./tutorials/tools#what-is-a-tool-and-how-to-build-one). + +## Multi-agents + +Multi-agent systems have been introduced with Microsoft's framework [Autogen](https://huggingface.co/papers/2308.08155). + +In this type of framework, you have several agents working together to solve your task instead of only one. +It empirically yields better performance on most benchmarks. The reason for this better performance is conceptually simple: for many tasks, rather than using a do-it-all system, you would prefer to specialize units on sub-tasks. Here, having agents with separate tool sets and memories allows to achieve efficient specialization. For instance, why fill the memory of the code generating agent with all the content of webpages visited by the web search agent? It's better to keep them separate. + +You can easily build hierarchical multi-agent systems with `smolagents`. + +To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools. + +Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]: + +```py +from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent + +model = HfApiModel() + +web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) + +managed_web_agent = ManagedAgent( + agent=web_agent, + name="web_search", + description="Runs web searches for you. Give it your query as an argument." +) + +manager_agent = CodeAgent( + tools=[], model=model, managed_agents=[managed_web_agent] +) + +manager_agent.run("Who is the CEO of Hugging Face?") +``` + +> [!TIP] +> For an in-depth example of an efficient multi-agent implementation, see [how we pushed our multi-agent system to the top of the GAIA leaderboard](https://huggingface.co/blog/beating-gaia). + + +## Talk with your agent and visualize its thoughts in a cool Gradio interface + +You can use `GradioUI` to interactively submit tasks to your agent and observe its thought and execution process, here is an example: + +```py +from smolagents import ( + load_tool, + CodeAgent, + HfApiModel, + GradioUI +) + +# Import tool from Hub +image_generation_tool = load_tool("m-ric/text-to-image") + +model = HfApiModel(model_id) + +# Initialize the agent with the image generation tool +agent = CodeAgent(tools=[image_generation_tool], model=model) + +GradioUI(agent).launch() +``` + +Under the hood, when the user types a new answer, the agent is launched with `agent.run(user_request, reset=False)`. +The `reset=False` flag means the agent's memory is not flushed before launching this new task, which lets the conversation go on. + +You can also use this `reset=False` argument to keep the conversation going in any other agentic application. + +## Next steps + +For more in-depth usage, you will then want to check out our tutorials: +- [the explanation of how our code agents work](./tutorials/secure_code_execution) +- [this guide on how to build good agents](./tutorials/building_good_agents). +- [the in-depth guide for tool usage](./tutorials/building_good_agents). diff --git a/docs/source/zh/index.md b/docs/source/zh/index.md new file mode 100644 index 000000000..7392cfc4a --- /dev/null +++ b/docs/source/zh/index.md @@ -0,0 +1,49 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. +--> + +# `smolagents` + +This library is the simplest framework out there to build powerful agents! By the way, wtf are "agents"? We provide our definition [in this page](conceptual_guides/intro_agents), whe're you'll also find tips for when to use them or not (spoilers: you'll often be better off without agents). + +This library offers: + +✨ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code! + +🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. + +🧑💻 **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution). + +🤗 **Hub integrations**: you can share and load tools to/from the Hub, and more is to come! + +<div class="mt-10"> + <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5"> + <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guided_tour" + ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Guided tour</div> + <p class="text-gray-700">Learn the basics and become familiar with using Agents. Start here if you are using Agents for the first time!</p> + </a> + <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./examples/text_to_sql" + ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div> + <p class="text-gray-700">Practical guides to help you achieve a specific goal: create an agent to generate and test SQL queries!</p> + </a> + <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual_guides/intro_agents" + ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div> + <p class="text-gray-700">High-level explanations for building a better understanding of important topics.</p> + </a> + <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/building_good_agents" + ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div> + <p class="text-gray-700">Horizontal tutorials that cover important aspects of building agents.</p> + </a> + </div> +</div> diff --git a/docs/source/zh/reference/agents.md b/docs/source/zh/reference/agents.md new file mode 100644 index 000000000..9cdca7d0b --- /dev/null +++ b/docs/source/zh/reference/agents.md @@ -0,0 +1,143 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Agents + +<Tip warning={true}> + +Smolagents is an experimental API which is subject to change at any time. Results returned by the agents +can vary as the APIs or underlying models are prone to change. + +</Tip> + +To learn more about agents and tools make sure to read the [introductory guide](../index). This page +contains the API docs for the underlying classes. + +## Agents + +Our agents inherit from [`MultiStepAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react). + +We provide two types of agents, based on the main [`Agent`] class. + - [`CodeAgent`] is the default agent, it writes its tool calls in Python code. + - [`ToolCallingAgent`] writes its tool calls in JSON. + +Both require arguments `model` and list of tools `tools` at initialization. + + +### Classes of agents + +[[autodoc]] MultiStepAgent + +[[autodoc]] CodeAgent + +[[autodoc]] ToolCallingAgent + + +### ManagedAgent + +[[autodoc]] ManagedAgent + +### stream_to_gradio + +[[autodoc]] stream_to_gradio + +### GradioUI + +[[autodoc]] GradioUI + +## Models + +You're free to create and use your own models to power your agent. + +You could use any `model` callable for your agent, as long as: +1. It follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`. +2. It stops generating outputs *before* the sequences passed in the argument `stop_sequences` + +For defining your LLM, you can make a `custom_model` method which accepts a list of [messages](./chat_templating) and returns text. This callable also needs to accept a `stop_sequences` argument that indicates when to stop generating. + +```python +from huggingface_hub import login, InferenceClient + +login("<YOUR_HUGGINGFACEHUB_API_TOKEN>") + +model_id = "meta-llama/Llama-3.3-70B-Instruct" + +client = InferenceClient(model=model_id) + +def custom_model(messages, stop_sequences=["Task"]) -> str: + response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) + answer = response.choices[0].message.content + return answer +``` + +Additionally, `custom_model` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs. + +### TransformersModel + +For convenience, we have added a `TransformersModel` that implements the points above by building a local `transformers` pipeline for the model_id given at initialization. + +```python +from smolagents import TransformersModel + +model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct") + +print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) +``` +```text +>>> What a +``` + +[[autodoc]] TransformersModel + +### HfApiModel + +The `HfApiModel` wraps an [HF Inference API](https://huggingface.co/docs/api-inference/index) client for the execution of the LLM. + +```python +from smolagents import HfApiModel + +messages = [ + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing great. How can I help you today?"}, + {"role": "user", "content": "No need to help, take it easy."}, +] + +model = HfApiModel() +print(model(messages)) +``` +```text +>>> Of course! If you change your mind, feel free to reach out. Take care! +``` +[[autodoc]] HfApiModel + +### LiteLLMModel + +The `LiteLLMModel` leverages [LiteLLM](https://www.litellm.ai/) to support 100+ LLMs from various providers. +You can pass kwargs upon model initialization that will then be used whenever using the model, for instance below we pass `temperature`. + +```python +from smolagents import LiteLLMModel + +messages = [ + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing great. How can I help you today?"}, + {"role": "user", "content": "No need to help, take it easy."}, +] + +model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2) +print(model(messages, max_tokens=10)) +``` + +[[autodoc]] LiteLLMModel \ No newline at end of file diff --git a/docs/source/zh/reference/tools.md b/docs/source/zh/reference/tools.md new file mode 100644 index 000000000..022ad35d2 --- /dev/null +++ b/docs/source/zh/reference/tools.md @@ -0,0 +1,91 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Tools + +<Tip warning={true}> + +Smolagents is an experimental API which is subject to change at any time. Results returned by the agents +can vary as the APIs or underlying models are prone to change. + +</Tip> + +To learn more about agents and tools make sure to read the [introductory guide](../index). This page +contains the API docs for the underlying classes. + +## Tools + +### load_tool + +[[autodoc]] load_tool + +### tool + +[[autodoc]] tool + +### Tool + +[[autodoc]] Tool + +### launch_gradio_demo + +[[autodoc]] launch_gradio_demo + +## Default tools + +### PythonInterpreterTool + +[[autodoc]] PythonInterpreterTool + +### DuckDuckGoSearchTool + +[[autodoc]] DuckDuckGoSearchTool + +### VisitWebpageTool + +[[autodoc]] VisitWebpageTool + +## ToolCollection + +[[autodoc]] ToolCollection + +## Agent Types + +Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return +text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to +correctly render these returns in ipython (jupyter, colab, ipython notebooks, ...), we implement wrapper classes +around these types. + +The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image +object should still behave as a `PIL.Image`. + +These types have three specific purposes: + +- Calling `to_raw` on the type should return the underlying object +- Calling `to_string` on the type should return the object as a string: that can be the string in case of an `AgentText` + but will be the path of the serialized version of the object in other instances +- Displaying it in an ipython kernel should display the object correctly + +### AgentText + +[[autodoc]] smolagents.types.AgentText + +### AgentImage + +[[autodoc]] smolagents.types.AgentImage + +### AgentAudio + +[[autodoc]] smolagents.types.AgentAudio diff --git a/docs/source/zh/tutorials/building_good_agents.md b/docs/source/zh/tutorials/building_good_agents.md new file mode 100644 index 000000000..de8cd3ad8 --- /dev/null +++ b/docs/source/zh/tutorials/building_good_agents.md @@ -0,0 +1,285 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Building good agents + +[[open-in-colab]] + +There's a world of difference between building an agent that works and one that doesn't. +How can we build agents that fall into the latter category? +In this guide, we're going to see best practices for building agents. + +> [!TIP] +> If you're new to building agents, make sure to first read the [intro to agents](../conceptual_guides/intro_agents) and the [guided tour of smolagents](../guided_tour). + +### The best agentic systems are the simplest: simplify the workflow as much as you can + +Giving an LLM some agency in your workflow introduces some risk of errors. + +Well-programmed agentic systems have good error logging and retry mechanisms anyway, so the LLM engine has a chance to self-correct their mistake. But to reduce the risk of LLM error to the maximum, you should simplify your workflow! + +Let's revisit the example from [intro_agents]: a bot that answers user queries for a surf trip company. +Instead of letting the agent do 2 different calls for "travel distance API" and "weather API" each time they are asked about a new surf spot, you could just make one unified tool "return_spot_information", a function that calls both APIs at once and returns their concatenated outputs to the user. + +This will reduce costs, latency, and error risk! + +The main guideline is: Reduce the number of LLM calls as much as you can. + +This leads to a few takeaways: +- Whenever possible, group 2 tools in one, like in our example of the two APIs. +- Whenever possible, logic should be based on deterministic functions rather than agentic decisions. + +### Improve the information flow to the LLM engine + +Remember that your LLM engine is like a ~intelligent~ robot, tapped into a room with the only communication with the outside world being notes passed under a door. + +It won't know of anything that happened if you don't explicitly put that into its prompt. + +So first start with making your task very clear! +Since an agent is powered by an LLM, minor variations in your task formulation might yield completely different results. + +Then, improve the information flow towards your agent in tool use. + +Particular guidelines to follow: +- Each tool should log (by simply using `print` statements inside the tool's `forward` method) everything that could be useful for the LLM engine. + - In particular, logging detail on tool execution errors would help a lot! + +For instance, here's a tool that retrieves weather data based on location and date-time: + +First, here's a poor version: +```python +import datetime +from smolagents import tool + +def get_weather_report_at_coordinates(coordinates, date_time): + # Dummy function, returns a list of [temperature in °C, risk of rain on a scale 0-1, wave height in m] + return [28.0, 0.35, 0.85] + +def get_coordinates_from_location(location): + # Returns dummy coordinates + return [3.3, -42.0] + +@tool +def get_weather_api(location: str, date_time: str) -> str: + """ + Returns the weather report. + + Args: + location: the name of the place that you want the weather for. + date_time: the date and time for which you want the report. + """ + lon, lat = convert_location_to_coordinates(location) + date_time = datetime.strptime(date_time) + return str(get_weather_report_at_coordinates((lon, lat), date_time)) +``` + +Why is it bad? +- there's no precision of the format that should be used for `date_time` +- there's no detail on how location should be specified. +- there's no logging mechanism tying to explicit failure cases like location not being in a proper format, or date_time not being properly formatted. +- the output format is hard to understand + +If the tool call fails, the error trace logged in memory can help the LLM reverse engineer the tool to fix the errors. But why leave it with so much heavy lifting to do? + +A better way to build this tool would have been the following: +```python +@tool +def get_weather_api(location: str, date_time: str) -> str: + """ + Returns the weather report. + + Args: + location: the name of the place that you want the weather for. Should be a place name, followed by possibly a city name, then a country, like "Anchor Point, Taghazout, Morocco". + date_time: the date and time for which you want the report, formatted as '%m/%d/%y %H:%M:%S'. + """ + lon, lat = convert_location_to_coordinates(location) + try: + date_time = datetime.strptime(date_time) + except Exception as e: + raise ValueError("Conversion of `date_time` to datetime format failed, make sure to provide a string in format '%m/%d/%y %H:%M:%S'. Full trace:" + str(e)) + temperature_celsius, risk_of_rain, wave_height = get_weather_report_at_coordinates((lon, lat), date_time) + return f"Weather report for {location}, {date_time}: Temperature will be {temperature_celsius}°C, risk of rain is {risk_of_rain*100:.0f}%, wave height is {wave_height}m." +``` + +In general, to ease the load on your LLM, the good question to ask yourself is: "How easy would it be for me, if I was dumb and using this tool for the first time ever, to program with this tool and correct my own errors?". + +### Give more arguments to the agent + +To pass some additional objects to your agent beyond the simple string describing the task, you can use the `additional_args` argument to pass any type of object: + +```py +from smolagents import CodeAgent, HfApiModel + +model_id = "meta-llama/Llama-3.3-70B-Instruct" + +agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True) + +agent.run( + "Why does Mike not know many people in New York?", + additional_args={"mp3_sound_file_url":'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3'} +) +``` +For instance, you can use this `additional_args` argument to pass images or strings that you want your agent to leverage. + + + +## How to debug your agent + +### 1. Use a stronger LLM + +In an agentic workflows, some of the errors are actual errors, some other are the fault of your LLM engine not reasoning properly. +For instance, consider this trace for an `CodeAgent` that I asked to create a car picture: +``` +==================================================================================================== New task ==================================================================================================== +Make me a cool car picture +──────────────────────────────────────────────────────────────────────────────────────────────────── New step ──────────────────────────────────────────────────────────────────────────────────────────────────── +Agent is executing the code below: ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +image_generator(prompt="A cool, futuristic sports car with LED headlights, aerodynamic design, and vibrant color, high-res, photorealistic") +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +Last output from code snippet: ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png +Step 1: + +- Time taken: 16.35 seconds +- Input tokens: 1,383 +- Output tokens: 77 +──────────────────────────────────────────────────────────────────────────────────────────────────── New step ──────────────────────────────────────────────────────────────────────────────────────────────────── +Agent is executing the code below: ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +final_answer("/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png") +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Print outputs: + +Last output from code snippet: ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png +Final answer: +/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png +``` +The user sees, instead of an image being returned, a path being returned to them. +It could look like a bug from the system, but actually the agentic system didn't cause the error: it's just that the LLM brain did the mistake of not saving the image output into a variable. +Thus it cannot access the image again except by leveraging the path that was logged while saving the image, so it returns the path instead of an image. + +The first step to debugging your agent is thus "Use a more powerful LLM". Alternatives like `Qwen2/5-72B-Instruct` wouldn't have made that mistake. + +### 2. Provide more guidance / more information + +You can also use less powerful models, provided you guide them more effectively. + +Put yourself in the shoes of your model: if you were the model solving the task, would you struggle with the information available to you (from the system prompt + task formulation + tool description) ? + +Would you need some added clarifications? + +To provide extra information, we do not recommend to change the system prompt right away: the default system prompt has many adjustments that you do not want to mess up except if you understand the prompt very well. +Better ways to guide your LLM engine are: +- If it 's about the task to solve: add all these details to the task. The task could be 100s of pages long. +- If it's about how to use tools: the description attribute of your tools. + + +### 3. Change the system prompt (generally not advised) + +If above clarifications above are not sufficient, you can change the system prompt. + +Let's see how it works. For example, let us check the default system prompt for the [`CodeAgent`] (below version is shortened by skipping zero-shot examples). + +```python +print(agent.system_prompt_template) +``` +Here is what you get: +```text +You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can. +To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code. +To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. + +At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use. +Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_code>' sequence. +During each intermediate step, you can use 'print()' to save whatever important information you will then need. +These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step. +In the end you have to return a final answer using the `final_answer` tool. + +Here are a few examples using notional tools: +--- +{examples} + +Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: + +{{tool_descriptions}} + +{{managed_agents_descriptions}} + +Here are the rules you should always follow to solve your task: +1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail. +2. Use only variables that you have defined! +3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'. +4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. +5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters. +6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'. +7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables. +8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}} +9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. +10. Don't give up! You're in charge of solving the task, not providing directions to solve it. + +Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. +``` + +As you can see, there are placeholders like `"{{tool_descriptions}}"`: these will be used upon agent initialization to insert certain automatically generated descriptions of tools or managed agents. + +So while you can overwrite this system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter, your new system prompt must contain the following placeholders: +- `"{{tool_descriptions}}"` to insert tool descriptions. +- `"{{managed_agents_description}}"` to insert the description for managed agents if there are any. +- For `CodeAgent` only: `"{{authorized_imports}}"` to insert the list of authorized imports. + +Then you can change the system prompt as follows: + +```py +from smolagents.prompts import CODE_SYSTEM_PROMPT + +modified_system_prompt = CODE_SYSTEM_PROMPT + "\nHere you go!" # Change the system prompt here + +agent = CodeAgent( + tools=[], + model=HfApiModel(), + system_prompt=modified_system_prompt +) +``` + +This also works with the [`ToolCallingAgent`]. + + +### 4. Extra planning + +We provide a model for a supplementary planning step, that an agent can run regularly in-between normal action steps. In this step, there is no tool call, the LLM is simply asked to update a list of facts it knows and to reflect on what steps it should take next based on those facts. + +```py +from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool +from dotenv import load_dotenv + +load_dotenv() + +# Import tool from Hub +image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) + +search_tool = DuckDuckGoSearchTool() + +agent = CodeAgent( + tools=[search_tool], + model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"), + planning_interval=3 # This is where you activate planning! +) + +# Run it! +result = agent.run( + "How long would a cheetah at full speed take to run the length of Pont Alexandre III?", +) +``` diff --git a/docs/source/zh/tutorials/secure_code_execution.md b/docs/source/zh/tutorials/secure_code_execution.md new file mode 100644 index 000000000..d8a6109ae --- /dev/null +++ b/docs/source/zh/tutorials/secure_code_execution.md @@ -0,0 +1,82 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Secure code execution + +[[open-in-colab]] + +> [!TIP] +> If you're new to building agents, make sure to first read the [intro to agents](../conceptual_guides/intro_agents) and the [guided tour of smolagents](../guided_tour). + +### Code agents + +[Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the LLM write its actions (the tool calls) in code is much better than the current standard format for tool calling, which is across the industry different shades of "writing actions as a JSON of tools names and arguments to use". + +Why is code better? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets was a better way, this package would have been written in JSON snippets and the devil would be laughing at us. + +Code is just a better way to express actions on a computer. It has better: +- **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function? +- **Object management:** how do you store the output of an action like `generate_image` in JSON? +- **Generality:** code is built to express simply anything you can do have a computer do. +- **Representation in LLM training corpuses:** why not leverage this benediction of the sky that plenty of quality actions have already been included in LLM training corpuses? + +This is illustrated on the figure below, taken from [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030). + +<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png"> + +This is why we put emphasis on proposing code agents, in this case python agents, which meant putting higher effort on building secure python interpreters. + +### Local python interpreter + +By default, the `CodeAgent` runs LLM-generated code in your environment. +This execution is not done by the vanilla Python interpreter: we've re-built a more secure `LocalPythonInterpreter` from the ground up. +This interpreter is designed for security by: + - Restricting the imports to a list explicitly passed by the user + - Capping the number of operations to prevent infinite loops and resource bloating. + - Will not perform any operation that's not pre-defined. + +We've used this on many use cases, without ever observing any damage to the environment. + +However this solution is not watertight: one could imagine occasions where LLMs fine-tuned for malignant actions could still hurt your environment. For instance if you've allowed an innocuous package like `Pillow` to process images, the LLM could generate thousands of saves of images to bloat your hard drive. +It's certainly not likely if you've chosen the LLM engine yourself, but it could happen. + +So if you want to be extra cautious, you can use the remote code execution option described below. + +### E2B code executor + +For maximum security, you can use our integration with E2B to run code in a sandboxed environment. This is a remote execution service that runs your code in an isolated container, making it impossible for the code to affect your local environment. + +For this, you will need to setup your E2B account and set your `E2B_API_KEY` in your environment variables. Head to [E2B's quickstart documentation](https://e2b.dev/docs/quickstart) for more information. + +Then you can install it with `pip install e2b-code-interpreter python-dotenv`. + +Now you're set! + +To set the code executor to E2B, simply pass the flag `use_e2b_executor=True` when initializing your `CodeAgent`. +Note that you should add all the tool's dependencies in `additional_authorized_imports`, so that the executor installs them. + +```py +from smolagents import CodeAgent, VisitWebpageTool, HfApiModel +agent = CodeAgent( + tools = [VisitWebpageTool()], + model=HfApiModel(), + additional_authorized_imports=["requests", "markdownify"], + use_e2b_executor=True +) + +agent.run("What was Abraham Lincoln's preferred pet?") +``` + +E2B code execution is not compatible with multi-agents at the moment - because having an agent call in a code blob that should be executed remotely is a mess. But we're working on adding it! diff --git a/docs/source/zh/tutorials/tools.md b/docs/source/zh/tutorials/tools.md new file mode 100644 index 000000000..014cd3b6e --- /dev/null +++ b/docs/source/zh/tutorials/tools.md @@ -0,0 +1,222 @@ +<!--Copyright 2024 The HuggingFace Team. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be +rendered properly in your Markdown viewer. + +--> +# Tools + +[[open-in-colab]] + +Here, we're going to see advanced tool usage. + +> [!TIP] +> If you're new to building agents, make sure to first read the [intro to agents](../conceptual_guides/intro_agents) and the [guided tour of smolagents](../guided_tour). + +- [Tools](#tools) + - [What is a tool, and how to build one?](#what-is-a-tool-and-how-to-build-one) + - [Share your tool to the Hub](#share-your-tool-to-the-hub) + - [Import a Space as a tool](#import-a-space-as-a-tool) + - [Use LangChain tools](#use-langchain-tools) + - [Manage your agent's toolbox](#manage-your-agents-toolbox) + - [Use a collection of tools](#use-a-collection-of-tools) + +### What is a tool, and how to build one? + +A tool is mostly a function that an LLM can use in an agentic system. + +But to use it, the LLM will need to be given an API: name, tool description, input types and descriptions, output type. + +So it cannot be only a function. It should be a class. + +So at core, the tool is a class that wraps a function with metadata that helps the LLM understand how to use it. + +Here's how it looks: + +```python +from smolagents import Tool + +class HFModelDownloadsTool(Tool): + name = "model_download_counter" + description = """ + This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. + It returns the name of the checkpoint.""" + inputs = { + "task": { + "type": "string", + "description": "the task category (such as text-classification, depth-estimation, etc)", + } + } + output_type = "string" + + def forward(self, task: str): + from huggingface_hub import list_models + + model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) + return model.id + +model_downloads_tool = HFModelDownloadsTool() +``` + +The custom tool subclasses [`Tool`] to inherit useful methods. The child class also defines: +- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name it `model_download_counter`. +- An attribute `description` is used to populate the agent's system prompt. +- An `inputs` attribute, which is a dictionary with keys `"type"` and `"description"`. It contains information that helps the Python interpreter make educated choices about the input. +- An `output_type` attribute, which specifies the output type. The types for both `inputs` and `output_type` should be [Pydantic formats](https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema), they can be either of these: [`~AUTHORIZED_TYPES`]. +- A `forward` method which contains the inference code to be executed. + +And that's all it needs to be used in an agent! + +There's another way to build a tool. In the [guided_tour](../guided_tour), we implemented a tool using the `@tool` decorator. The [`tool`] decorator is the recommended way to define simple tools, but sometimes you need more than this: using several methods in a class for more clarity, or using additional class attributes. + +In this case, you can build your tool by subclassing [`Tool`] as described above. + +### Share your tool to the Hub + +You can share your custom tool to the Hub by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access. + +```python +model_downloads_tool.push_to_hub("{your_username}/hf-model-downloads", token="<YOUR_HUGGINGFACEHUB_API_TOKEN>") +``` + +For the push to Hub to work, your tool will need to respect some rules: +- All methods are self-contained, e.g. use variables that come either from their args. +- As per the above point, **all imports should be defined directly within the tool's functions**, else you will get an error when trying to call [`~Tool.save`] or [`~Tool.push_to_hub`] with your custom tool. +- If you subclass the `__init__` method, you can give it no other argument than `self`. This is because arguments set during a specific tool instance's initialization are hard to track, which prevents from sharing them properly to the hub. And anyway, the idea of making a specific class is that you can already set class attributes for anything you need to hard-code (just set `your_variable=(...)` directly under the `class YourTool(Tool):` line). And of course you can still create a class attribute anywhere in your code by assigning stuff to `self.your_variable`. + + +Once your tool is pushed to Hub, you can visualize it. [Here](https://huggingface.co/spaces/m-ric/hf-model-downloads) is the `model_downloads_tool` that I've pushed. It has a nice gradio interface. + +When diving into the tool files, you can find that all the tool's logic is under [tool.py](https://huggingface.co/spaces/m-ric/hf-model-downloads/blob/main/tool.py). That is where you can inspect a tool shared by someone else. + +Then you can load the tool with [`load_tool`] or create it with [`~Tool.from_hub`] and pass it to the `tools` parameter in your agent. +Since running tools means running custom code, you need to make sure you trust the repository, thus we require to pass `trust_remote_code=True` to load a tool from the Hub. + +```python +from smolagents import load_tool, CodeAgent + +model_download_tool = load_tool( + "{your_username}/hf-model-downloads", + trust_remote_code=True +) +``` + +### Import a Space as a tool + +You can directly import a Space from the Hub as a tool using the [`Tool.from_space`] method! + +You only need to provide the id of the Space on the Hub, its name, and a description that will help you agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space. + +For instance, let's import the [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space from the Hub and use it to generate an image. + +```python +image_generation_tool = Tool.from_space( + "black-forest-labs/FLUX.1-schnell", + name="image_generator", + description="Generate an image from a prompt" +) + +image_generation_tool("A sunny beach") +``` +And voilà, here's your image! 🏖️ + +<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/sunny_beach.webp"> + +Then you can use this tool just like any other tool. For example, let's improve the prompt `a rabbit wearing a space suit` and generate an image of it. + +```python +from smolagents import CodeAgent, HfApiModel + +model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +agent = CodeAgent(tools=[image_generation_tool], model=model) + +agent.run( + "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit' +) +``` + +```text +=== Agent thoughts: +improved_prompt could be "A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background" + +Now that I have improved the prompt, I can use the image generator tool to generate an image based on this prompt. +>>> Agent is executing the code below: +image = image_generator(prompt="A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background") +final_answer(image) +``` + +<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit_spacesuit_flux.webp"> + +How cool is this? 🤩 + +### Use LangChain tools + +We love Langchain and think it has a very compelling suite of tools. +To import a tool from LangChain, use the `from_langchain()` method. + +Here is how you can use it to recreate the intro's search result using a LangChain web search tool. +This tool will need `pip install langchain google-search-results -q` to work properly. +```python +from langchain.agents import load_tools + +search_tool = Tool.from_langchain(load_tools(["serpapi"])[0]) + +agent = CodeAgent(tools=[search_tool], model=model) + +agent.run("How many more blocks (also denoted as layers) are in BERT base encoder compared to the encoder from the architecture proposed in Attention is All You Need?") +``` + +### Manage your agent's toolbox + +You can manage an agent's toolbox by adding or replacing a tool. + +Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox. + +```python +from smolagents import HfApiModel + +model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") + +agent = CodeAgent(tools=[], model=model, add_base_tools=True) +agent.tools.append(model_download_tool) +``` +Now we can leverage the new tool: + +```python +agent.run( + "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub but reverse the letters?" +) +``` + + +> [!TIP] +> Beware of not adding too many tools to an agent: this can overwhelm weaker LLM engines. + + +### Use a collection of tools + +You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use. +Then pass them as a list to initialize your agent, and start using them! + +```py +from smolagents import ToolCollection, CodeAgent + +image_tool_collection = ToolCollection( + collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f", + token="<YOUR_HUGGINGFACEHUB_API_TOKEN>" +) +agent = CodeAgent(tools=[*image_tool_collection.tools], model=model, add_base_tools=True) + +agent.run("Please draw me a picture of rivers and lakes.") +``` + +To speed up the start, tools are loaded only if called by the agent. From 509a00cea9ad3ed1e03ac2abac2616ad8d49a610 Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 13:31:47 +0800 Subject: [PATCH 02/10] Translate `index.md` --- docs/source/zh/_toctree.yml | 2 +- docs/source/zh/index.md | 31 +++++++++++++++++-------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index bec73b788..507607e1f 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -1,4 +1,4 @@ -- title: Get started +- title: 起步 sections: - local: index title: 🤗 Agents diff --git a/docs/source/zh/index.md b/docs/source/zh/index.md index 7392cfc4a..6d6f1d65e 100644 --- a/docs/source/zh/index.md +++ b/docs/source/zh/index.md @@ -15,35 +15,38 @@ rendered properly in your Markdown viewer. # `smolagents` -This library is the simplest framework out there to build powerful agents! By the way, wtf are "agents"? We provide our definition [in this page](conceptual_guides/intro_agents), whe're you'll also find tips for when to use them or not (spoilers: you'll often be better off without agents). +这是构建强大agent的最简单框架!顺便问一下,什么是"agent"?我们在[此页面](conceptual_guides/intro_agents)提供了我们的定义,您还可以找到关于何时使用或不使用它们的建议(剧透:通常不使用agent会更好)。 -This library offers: +> [!TIP] +> 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) -✨ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code! +本库提供: -🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. +✨ **简洁性**:Agent逻辑仅需约千行代码。我们将抽象保持在原始代码之上的最小形态! -🧑💻 **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution). +🌐 **支持任何 LLM**:支持通过 Hub 托管的模型,使用其 `transformers` 版本或通过我们的推理 API 加载,也支持 OpenAI、Anthropic 等模型。使用任何 LLM 为agent提供动力都非常容易。 -🤗 **Hub integrations**: you can share and load tools to/from the Hub, and more is to come! +🧑💻 **一流的代码agent支持**,即编写代码作为其操作的agent(与"用于编写代码的agent"相对),[在此了解更多](tutorials/secure_code_execution)。 + +🤗 **Hub 集成**:您可以在 Hub 上共享和加载工具,更多功能即将推出! <div class="mt-10"> <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5"> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guided_tour" - ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Guided tour</div> - <p class="text-gray-700">Learn the basics and become familiar with using Agents. Start here if you are using Agents for the first time!</p> + ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">导览</div> + <p class="text-gray-700">学习基础知识并熟悉使用agent。如果您是第一次使用agent,请从这里开始!</p> </a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./examples/text_to_sql" - ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div> - <p class="text-gray-700">Practical guides to help you achieve a specific goal: create an agent to generate and test SQL queries!</p> + ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">操作指南</div> + <p class="text-gray-700">实用指南,帮助您实现特定目标:创建一个生成和测试 SQL 查询的agent!</p> </a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual_guides/intro_agents" - ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div> - <p class="text-gray-700">High-level explanations for building a better understanding of important topics.</p> + ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">概念指南</div> + <p class="text-gray-700">高级解释,帮助您更好地理解重要主题。</p> </a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/building_good_agents" - ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div> - <p class="text-gray-700">Horizontal tutorials that cover important aspects of building agents.</p> + ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">教程</div> + <p class="text-gray-700">涵盖构建agent重要方面的横向教程。</p> </a> </div> </div> From 33393f2065ca5bfb710e147cd916642a7ce3f8ad Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 13:59:07 +0800 Subject: [PATCH 03/10] Translate `guided_tour.md` --- docs/source/zh/_toctree.yml | 2 +- docs/source/zh/guided_tour.md | 195 +++++++++++++++++----------------- 2 files changed, 100 insertions(+), 97 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index 507607e1f..fed047e58 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -3,7 +3,7 @@ - local: index title: 🤗 Agents - local: guided_tour - title: Guided tour + title: 导览 - title: Tutorials sections: - local: tutorials/building_good_agents diff --git a/docs/source/zh/guided_tour.md b/docs/source/zh/guided_tour.md index 4e2fe44d4..d16786596 100644 --- a/docs/source/zh/guided_tour.md +++ b/docs/source/zh/guided_tour.md @@ -13,31 +13,34 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# Agents - Guided tour +# Agents - 导览 [[open-in-colab]] -In this guided visit, you will learn how to build an agent, how to run it, and how to customize it to make it work better for your use-case. +在本导览中,您将学习如何构建一个agent(智能体),如何运行它,以及如何自定义它以使其更好地适应您的使用场景。 -### Building your agent +> [!TIP] +> 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) + +### 构建您的agent -To initialize a minimal agent, you need at least these two arguments: +要初始化一个最小化的agent,您至少需要以下两个参数: -- `model`, a text-generation model to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. You can use any of these options: - - [`TransformersModel`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`. - - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood. - - [`LiteLLMModel`] lets you call 100+ different models through [LiteLLM](https://docs.litellm.ai/)! +- `model`,一个为您的agent提供动力的文本生成模型 - 因为agent与简单的LLM不同,它是一个使用LLM作为引擎的系统。您可以使用以下任一选项: + - [`TransformersModel`] 使用预初始化的`transformers`管道在本地机器上运行推理 + - [`HfApiModel`] 在底层使用`huggingface_hub.InferenceClient` + - [`LiteLLMModel`] 让您通过[LiteLLM](https://docs.litellm.ai/)调用100+不同的模型! -- `tools`, A list of `Tools` that the agent can use to solve the task. It can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`. +- `tools`,agent可以用来解决任务的`Tools`列表。它可以是一个空列表。您还可以通过定义可选参数`add_base_tools=True`在您的`tools`列表之上添加默认工具箱。 -Once you have these two arguments, `tools` and `model`, you can create an agent and run it. You can use any LLM you'd like, either through [Hugging Face API](https://huggingface.co/docs/api-inference/en/index), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), or [LiteLLM](https://www.litellm.ai/). +一旦有了这两个参数`tools`和`model`,您就可以创建一个agent并运行它。您可以使用任何您喜欢的LLM,无论是通过[Hugging Face API](https://huggingface.co/docs/api-inference/en/index)、[transformers](https://github.com/huggingface/transformers/)、[ollama](https://ollama.com/),还是[LiteLLM](https://www.litellm.ai/)。 -<hfoptions id="Pick a LLM"> +<hfoptions id="选择一个LLM"> <hfoption id="Hugging Face API"> -Hugging Face API is free to use without a token, but then it will have a rate limitation. +Hugging Face API可以免费使用而无需token,但会有速率限制。 -To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `HfApiModel`. +要访问受限模型或使用PRO账户提高速率限制,您需要设置环境变量`HF_TOKEN`或在初始化`HfApiModel`时传递`token`变量。 ```python from smolagents import CodeAgent, HfApiModel @@ -52,7 +55,7 @@ agent.run( ) ``` </hfoption> -<hfoption id="Local Transformers Model"> +<hfoption id="本地Transformers模型"> ```python from smolagents import CodeAgent, TransformersModel @@ -67,14 +70,14 @@ agent.run( ) ``` </hfoption> -<hfoption id="OpenAI or Anthropic API"> +<hfoption id="OpenAI或Anthropic API"> -To use `LiteLLMModel`, you need to set the environment variable `ANTHROPIC_API_KEY` or `OPENAI_API_KEY`, or pass `api_key` variable upon initialization. +要使用`LiteLLMModel`,您需要设置环境变量`ANTHROPIC_API_KEY`或`OPENAI_API_KEY`,或者在初始化时传递`api_key`变量。 ```python from smolagents import CodeAgent, LiteLLMModel -model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", api_key="YOUR_ANTHROPIC_API_KEY") # Could use 'gpt-4o' +model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", api_key="YOUR_ANTHROPIC_API_KEY") # 也可以使用'gpt-4o' agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.run( @@ -88,9 +91,9 @@ agent.run( from smolagents import CodeAgent, LiteLLMModel model = LiteLLMModel( - model_id="ollama_chat/llama3.2", # This model is a bit weak for agentic behaviours though - api_base="http://localhost:11434", # replace with remote open-ai compatible server if necessary - api_key="YOUR_API_KEY" # replace with API key if necessary + model_id="ollama_chat/llama3.2", # 这个模型对于agent行为来说有点弱 + api_base="http://localhost:11434", # 如果需要可以替换为远程open-ai兼容服务器 + api_key="YOUR_API_KEY" # 如果需要可以替换为API key ) agent = CodeAgent(tools=[], model=model, add_base_tools=True) @@ -102,15 +105,15 @@ agent.run( </hfoption> </hfoptions> -#### CodeAgent and ToolCallingAgent +#### CodeAgent和ToolCallingAgent -The [`CodeAgent`] is our default agent. It will write and execute python code snippets at each step. +[`CodeAgent`]是我们的默认agent。它将在每一步编写并执行Python代码片段。 -By default, the execution is done in your local environment. -This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and a set of predefined safe functions like `print` or functions from the `math` module, so you're already limited in what can be executed. +默认情况下,执行是在您的本地环境中完成的。 +这应该是安全的,因为唯一可以调用的函数是您提供的工具(特别是如果只有Hugging Face的工具)和一组预定义的安全函数,如`print`或`math`模块中的函数,所以您已经限制了可以执行的内容。 -The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue. -You can authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`CodeAgent`]: +Python解释器默认也不允许在安全列表之外导入,所以所有最明显的攻击都不应该成为问题。 +您可以通过在初始化[`CodeAgent`]时将授权模块作为字符串列表传递给参数`additional_authorized_imports`来授权额外的导入: ```py from smolagents import CodeAgent @@ -120,16 +123,16 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co ``` > [!WARNING] -> The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports! +> LLM可以生成任意代码然后执行:不要添加任何不安全的导入! -The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent. +如果生成的代码尝试执行非法操作或出现常规Python错误,执行将停止。 -You can also use [E2B code executor](https://e2b.dev/docs#what-is-e2-b) instead of a local Python interpreter by first [setting the `E2B_API_KEY` environment variable](https://e2b.dev/dashboard?tab=keys) and then passing `use_e2b_executor=True` upon agent initialization. +您也可以使用[E2B代码执行器](https://e2b.dev/docs#what-is-e2-b)而不是本地Python解释器,首先[设置`E2B_API_KEY`环境变量](https://e2b.dev/dashboard?tab=keys),然后在初始化agent时传递`use_e2b_executor=True`。 > [!TIP] -> Learn more about code execution [in this tutorial](tutorials/secure_code_execution). +> 在[该教程中](tutorials/secure_code_execution)了解更多关于代码执行的内容。 -We also support the widely-used way of writing actions as JSON-like blobs: this is [`ToolCallingAgent`], it works much in the same way like [`CodeAgent`], of course without `additional_authorized_imports` since it doesn't execute code: +我们还支持广泛使用的将动作编写为JSON-like块的方式:[`ToolCallingAgent`],它的工作方式与[`CodeAgent`]非常相似,当然没有`additional_authorized_imports`,因为它不执行代码: ```py from smolagents import ToolCallingAgent @@ -138,33 +141,33 @@ agent = ToolCallingAgent(tools=[], model=model) agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") ``` -### Inspecting an agent run +### 检查agent运行 -Here are a few useful attributes to inspect what happened after a run: -- `agent.logs` stores the fine-grained logs of the agent. At every step of the agent's run, everything gets stored in a dictionary that then is appended to `agent.logs`. -- Running `agent.write_inner_memory_from_logs()` creates an inner memory of the agent's logs for the LLM to view, as a list of chat messages. This method goes over each step of the log and only stores what it's interested in as a message: for instance, it will save the system prompt and task in separate messages, then for each step it will store the LLM output as a message, and the tool call output as another message. Use this if you want a higher-level view of what has happened - but not every log will be transcripted by this method. +以下是一些有用的属性,用于检查运行后发生了什么: +- `agent.logs`存储agent的细粒度日志。在agent运行的每一步,所有内容都会存储在一个字典中,然后附加到`agent.logs`中。 +- 运行`agent.write_inner_memory_from_logs()`会为LLM创建一个agent日志的内部内存,作为聊天消息列表。此方法会遍历日志的每一步,并仅存储它感兴趣的内容作为消息:例如,它会将系统提示和任务存储为单独的消息,然后对于每一步,它会将LLM输出存储为一条消息,工具调用输出存储为另一条消息。如果您想要更高级别的视图 - 但不是每个日志都会被此方法转录。 -## Tools +## 工具 -A tool is an atomic function to be used by an agent. To be used by an LLM, it also needs a few attributes that constitute its API and will be used to describe to the LLM how to call this tool: -- A name -- A description -- Input types and descriptions -- An output type +工具是agent使用的原子函数。为了被LLM使用,它还需要一些构成其API的属性,这些属性将用于向LLM描述如何调用此工具: +- 名称 +- 描述 +- 输入类型和描述 +- 输出类型 -You can for instance check the [`PythonInterpreterTool`]: it has a name, a description, input descriptions, an output type, and a `forward` method to perform the action. +例如,您可以查看[`PythonInterpreterTool`]:它有一个名称、描述、输入描述、输出类型和一个执行操作的`forward`方法。 -When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why. +当agent初始化时,工具属性用于生成工具描述,该描述被嵌入到agent的系统提示中。这让agent知道它可以使用哪些工具以及为什么。 -### Default toolbox +### 默认工具箱 -Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools = True`: +Transformers附带了一个用于增强agent的默认工具箱,您可以在初始化时通过参数`add_base_tools = True`将其添加到您的agent中: -- **DuckDuckGo web search***: performs a web search using DuckDuckGo browser. -- **Python code interpreter**: runs your the LLM generated Python code in a secure environment. This tool will only be added to [`ToolCallingAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code -- **Transcriber**: a speech-to-text pipeline built on Whisper-Turbo that transcribes an audio to text. +- **DuckDuckGo网页搜索**:使用DuckDuckGo浏览器执行网页搜索。 +- **Python代码解释器**:在安全环境中运行LLM生成的Python代码。只有在使用`add_base_tools=True`初始化[`ToolCallingAgent`]时才会添加此工具,因为基于代码的agent已经可以原生执行Python代码 +- **转录器**:基于Whisper-Turbo构建的语音转文本管道,将音频转录为文本。 -You can manually use a tool by calling the [`load_tool`] function and a task to perform. +您可以通过调用[`load_tool`]函数和要执行的任务手动使用工具。 ```python from smolagents import load_tool @@ -173,12 +176,12 @@ search_tool = load_tool("web_search") print(search_tool("Who's the current president of Russia?")) ``` -### Create a new tool +### 创建一个新工具 -You can create your own tool for use cases not covered by the default tools from Hugging Face. -For example, let's create a tool that returns the most downloaded model for a given task from the Hub. +您可以创建自己的工具,用于Hugging Face默认工具未涵盖的用例。 +例如,让我们创建一个工具,返回Hub上给定任务下载量最多的模型。 -You'll start with the code below. +您将从以下代码开始。 ```python from huggingface_hub import list_models @@ -189,13 +192,13 @@ most_downloaded_model = next(iter(list_models(filter=task, sort="downloads", dir print(most_downloaded_model.id) ``` -This code can quickly be converted into a tool, just by wrapping it in a function and adding the `tool` decorator: -This is not the only way to build the tool: you can directly define it as a subclass of [`Tool`], which gives you more flexibility, for instance the possibility to initialize heavy class attributes. +这段代码可以通过将其包装在一个函数中并添加`tool`装饰器快速转换为工具: +这不是构建工具的唯一方法:您可以直接将其定义为[`Tool`]的子类,这为您提供了更多的灵活性,例如初始化重型类属性的可能性。 -Let's see how it works for both options: +让我们看看这两种选项的工作原理: -<hfoptions id="build-a-tool"> -<hfoption id="Decorate a function with @tool"> +<hfoptions id="构建工具"> +<hfoption id="使用@tool装饰一个函数"> ```py from smolagents import tool @@ -213,16 +216,16 @@ def model_download_tool(task: str) -> str: return most_downloaded_model.id ``` -The function needs: -- A clear name. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. -- Type hints on both inputs and output -- A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). Same as for the tool name, this description is an instruction manual for the LLM powering you agent, so do not neglect it. -All these elements will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! +该函数需要: +- 一个清晰的名称。名称应该足够描述此工具的功能,以帮助为agent提供动力的LLM。由于此工具返回任务下载量最多的模型,我们将其命名为`model_download_tool`。 +- 输入和输出的类型提示 +- 一个描述,其中包括一个'Args:'部分,其中每个参数都被描述(这次没有类型指示,它将从类型提示中提取)。与工具名称一样,此描述是为您的agent提供动力的LLM的说明书,所以不要忽视它。 +所有这些元素将在初始化时自动嵌入到agent的系统提示中:因此要努力使它们尽可能清晰! > [!TIP] -> This definition format is the same as tool schemas used in `apply_chat_template`, the only difference is the added `tool` decorator: read more on our tool use API [here](https://huggingface.co/blog/unified-tool-use#passing-tools-to-a-chat-template). +> 此定义格式与`apply_chat_template`中使用的工具模式相同,唯一的区别是添加了`tool`装饰器:[这里](https://huggingface.co/blog/unified-tool-use#passing-tools-to-a-chat-template)了解更多关于我们的工具使用API。 </hfoption> -<hfoption id="Subclass Tool"> +<hfoption id="子类化Tool"> ```py from smolagents import Tool @@ -238,17 +241,17 @@ class ModelDownloadTool(Tool): return most_downloaded_model.id ``` -The subclass needs the following attributes: -- A clear `name`. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. -- A `description`. Same as for the `name`, this description is an instruction manual for the LLM powering you agent, so do not neglect it. -- Input types and descriptions -- Output type -All these attributes will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! +子类需要以下属性: +- 一个清晰的`name`。名称应该足够描述此工具的功能,以帮助为agent提供动力的LLM。由于此工具返回任务下载量最多的模型,我们将其命名为`model_download_tool`。 +- 一个`description`。与`name`一样,此描述是为您的agent提供动力的LLM的说明书,所以不要忽视它。 +- 输入类型和描述 +- 输出类型 +所有这些属性将在初始化时自动嵌入到agent的系统提示中:因此要努力使它们尽可能清晰! </hfoption> </hfoptions> -Then you can directly initialize your agent: +然后您可以直接初始化您的agent: ```py from smolagents import CodeAgent, HfApiModel agent = CodeAgent(tools=[model_download_tool], model=HfApiModel()) @@ -257,7 +260,7 @@ agent.run( ) ``` -You get the following logs: +您将获得以下日志: ```text ╭──────────────────────────────────────── New run ─────────────────────────────────────────╮ │ │ @@ -265,7 +268,7 @@ You get the following logs: │ task on the Hugging Face Hub? │ │ │ ╰─ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯ -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮ │ 1 model_name = model_download_tool(task="text-to-video") │ │ 2 print(model_name) │ @@ -275,7 +278,7 @@ ByteDance/AnimateDiff-Lightning Out: None [Step 0: Duration 0.27 seconds| Input tokens: 2,069 | Output tokens: 60] -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮ │ 1 final_answer("ByteDance/AnimateDiff-Lightning") │ ╰──────────────────────────────────────────────────────────────────────────────────────────╯ @@ -285,20 +288,20 @@ Out[20]: 'ByteDance/AnimateDiff-Lightning' ``` > [!TIP] -> Read more on tools in the [dedicated tutorial](./tutorials/tools#what-is-a-tool-and-how-to-build-one). +> 在[专用教程](./tutorials/tools#what-is-a-tool-and-how-to-build-one)中了解更多关于工具的内容。 -## Multi-agents +## 多agent -Multi-agent systems have been introduced with Microsoft's framework [Autogen](https://huggingface.co/papers/2308.08155). +多agent系统是随着微软的框架[Autogen](https://huggingface.co/papers/2308.08155)引入的。 -In this type of framework, you have several agents working together to solve your task instead of only one. -It empirically yields better performance on most benchmarks. The reason for this better performance is conceptually simple: for many tasks, rather than using a do-it-all system, you would prefer to specialize units on sub-tasks. Here, having agents with separate tool sets and memories allows to achieve efficient specialization. For instance, why fill the memory of the code generating agent with all the content of webpages visited by the web search agent? It's better to keep them separate. +在这种类型的框架中,您有多个agent一起工作来解决您的任务,而不是只有一个。 +经验表明,这在大多数基准测试中表现更好。这种更好表现的原因在概念上很简单:对于许多任务,与其使用一个全能系统,您更愿意将单元专门用于子任务。在这里,拥有具有单独工具集和内存的agent可以实现高效的专业化。例如,为什么要用网页搜索agent访问的所有网页内容填充代码生成agent的内存?最好将它们分开。 -You can easily build hierarchical multi-agent systems with `smolagents`. +您可以使用`smolagents`轻松构建分层多agent系统。 -To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools. +为此,将agent封装在[`ManagedAgent`]对象中。此对象需要参数`agent`、`name`和`description`,这些参数将嵌入到管理agent的系统提示中,以让它知道如何调用此托管agent,就像我们对工具所做的那样。 -Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]: +以下是一个使用我们的[`DuckDuckGoSearchTool`]制作一个管理特定网页搜索agent的agent的示例: ```py from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent @@ -321,12 +324,12 @@ manager_agent.run("Who is the CEO of Hugging Face?") ``` > [!TIP] -> For an in-depth example of an efficient multi-agent implementation, see [how we pushed our multi-agent system to the top of the GAIA leaderboard](https://huggingface.co/blog/beating-gaia). +> 有关高效多agent实现的深入示例,请参阅[我们如何将多agent系统推向GAIA排行榜的顶部](https://huggingface.co/blog/beating-gaia)。 -## Talk with your agent and visualize its thoughts in a cool Gradio interface +## 与您的agent交谈并在酷炫的Gradio界面中可视化其思考过程 -You can use `GradioUI` to interactively submit tasks to your agent and observe its thought and execution process, here is an example: +您可以使用`GradioUI`交互式地向您的agent提交任务并观察其思考和执行过程,以下是一个示例: ```py from smolagents import ( @@ -336,25 +339,25 @@ from smolagents import ( GradioUI ) -# Import tool from Hub +# 从Hub导入工具 image_generation_tool = load_tool("m-ric/text-to-image") model = HfApiModel(model_id) -# Initialize the agent with the image generation tool +# 使用图像生成工具初始化agent agent = CodeAgent(tools=[image_generation_tool], model=model) GradioUI(agent).launch() ``` -Under the hood, when the user types a new answer, the agent is launched with `agent.run(user_request, reset=False)`. -The `reset=False` flag means the agent's memory is not flushed before launching this new task, which lets the conversation go on. +在底层,当用户输入新答案时,agent会以`agent.run(user_request, reset=False)`启动。 +`reset=False`标志意味着在启动此新任务之前不会刷新agent的内存,这使得对话可以继续。 -You can also use this `reset=False` argument to keep the conversation going in any other agentic application. +您也可以在其他agent化应用程序中使用此`reset=False`参数来保持对话继续。 -## Next steps +## 下一步 -For more in-depth usage, you will then want to check out our tutorials: -- [the explanation of how our code agents work](./tutorials/secure_code_execution) -- [this guide on how to build good agents](./tutorials/building_good_agents). -- [the in-depth guide for tool usage](./tutorials/building_good_agents). +要更深入地使用,您将需要查看我们的教程: +- [我们的代码agent如何工作的解释](./tutorials/secure_code_execution) +- [本指南关于如何构建好的agent](./tutorials/building_good_agents)。 +- [工具使用的深入指南](./tutorials/tools)。 From c51feb89abef3bb1a99e3a87bf4d0a1001c2e161 Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 14:21:35 +0800 Subject: [PATCH 04/10] Translate `tutorials/building_good_agents.md` --- docs/source/zh/_toctree.yml | 2 +- .../zh/tutorials/building_good_agents.md | 149 +++++++++--------- 2 files changed, 75 insertions(+), 76 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index fed047e58..c55e27e96 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -7,7 +7,7 @@ - title: Tutorials sections: - local: tutorials/building_good_agents - title: ✨ Building good agents + title: ✨ 构建好用的agents - local: tutorials/tools title: 🛠️ Tools - in-depth guide - local: tutorials/secure_code_execution diff --git a/docs/source/zh/tutorials/building_good_agents.md b/docs/source/zh/tutorials/building_good_agents.md index de8cd3ad8..336f873e3 100644 --- a/docs/source/zh/tutorials/building_good_agents.md +++ b/docs/source/zh/tutorials/building_good_agents.md @@ -13,62 +13,62 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# Building good agents +# 构建好用的agent [[open-in-colab]] -There's a world of difference between building an agent that works and one that doesn't. -How can we build agents that fall into the latter category? -In this guide, we're going to see best practices for building agents. +能良好工作的agent和不能工作的agent之间,有天壤之别。 +我们怎么样才能构建出属于前者的agent呢? +在本指南中,我们将看到构建agent的最佳实践。 > [!TIP] -> If you're new to building agents, make sure to first read the [intro to agents](../conceptual_guides/intro_agents) and the [guided tour of smolagents](../guided_tour). +> 如果你是agent构建的新手,请确保首先阅读[agent介绍](../conceptual_guides/intro_agents)和[smolagents导览](../guided_tour)。 -### The best agentic systems are the simplest: simplify the workflow as much as you can +### 最好的agent系统是最简单的:尽可能简化工作流 -Giving an LLM some agency in your workflow introduces some risk of errors. +在你的工作流中赋予LLM一些自主权,会引入一些错误风险。 -Well-programmed agentic systems have good error logging and retry mechanisms anyway, so the LLM engine has a chance to self-correct their mistake. But to reduce the risk of LLM error to the maximum, you should simplify your workflow! +经过良好编程的agent系统,通常具有良好的错误日志记录和重试机制,因此LLM引擎有机会自我纠错。但为了最大限度地降低LLM错误的风险,你应该简化你的工作流! -Let's revisit the example from [intro_agents]: a bot that answers user queries for a surf trip company. -Instead of letting the agent do 2 different calls for "travel distance API" and "weather API" each time they are asked about a new surf spot, you could just make one unified tool "return_spot_information", a function that calls both APIs at once and returns their concatenated outputs to the user. +让我们回顾一下[agent介绍](../conceptual_guides/intro_agents)中的例子:一个为冲浪旅行公司回答用户咨询的机器人。 +与其让agent每次被问及新的冲浪地点时,都分别调用"旅行距离API"和"天气API",你可以只创建一个统一的工具"return_spot_information",一个同时调用这两个API,并返回它们连接输出的函数。 -This will reduce costs, latency, and error risk! +这可以降低成本、延迟和错误风险! -The main guideline is: Reduce the number of LLM calls as much as you can. +主要的指导原则是:尽可能减少LLM调用的次数。 -This leads to a few takeaways: -- Whenever possible, group 2 tools in one, like in our example of the two APIs. -- Whenever possible, logic should be based on deterministic functions rather than agentic decisions. +这可以带来一些启发: +- 尽可能把两个工具合并为一个,就像我们两个API的例子。 +- 尽可能基于确定性函数,而不是agent决策,来实现逻辑。 -### Improve the information flow to the LLM engine +### 改善流向LLM引擎的信息流 -Remember that your LLM engine is like a ~intelligent~ robot, tapped into a room with the only communication with the outside world being notes passed under a door. +记住,你的LLM引擎就像一个~智能~机器人,被关在一个房间里,与外界唯一的交流方式是通过门缝传递的纸条。 -It won't know of anything that happened if you don't explicitly put that into its prompt. +如果你没有明确地将信息放入其提示中,它将不知道发生的任何事情。 -So first start with making your task very clear! -Since an agent is powered by an LLM, minor variations in your task formulation might yield completely different results. +所以首先要让你的任务非常清晰! +由于agent由LLM驱动,任务表述的微小变化可能会产生完全不同的结果。 -Then, improve the information flow towards your agent in tool use. +然后,改善工具使用中流向agent的信息流。 -Particular guidelines to follow: -- Each tool should log (by simply using `print` statements inside the tool's `forward` method) everything that could be useful for the LLM engine. - - In particular, logging detail on tool execution errors would help a lot! +需要遵循的具体指南: +- 每个工具都应该记录(只需在工具的`forward`方法中使用`print`语句)对LLM引擎可能有用的所有信息。 + - 特别是,记录工具执行错误的详细信息会很有帮助! -For instance, here's a tool that retrieves weather data based on location and date-time: +例如,这里有一个根据位置和日期时间检索天气数据的工具: -First, here's a poor version: +首先,这是一个糟糕的版本: ```python import datetime from smolagents import tool def get_weather_report_at_coordinates(coordinates, date_time): - # Dummy function, returns a list of [temperature in °C, risk of rain on a scale 0-1, wave height in m] + # 虚拟函数,返回[温度(°C),降雨风险(0-1),浪高(m)] return [28.0, 0.35, 0.85] def get_coordinates_from_location(location): - # Returns dummy coordinates + # 返回虚拟坐标 return [3.3, -42.0] @tool @@ -85,15 +85,15 @@ def get_weather_api(location: str, date_time: str) -> str: return str(get_weather_report_at_coordinates((lon, lat), date_time)) ``` -Why is it bad? -- there's no precision of the format that should be used for `date_time` -- there's no detail on how location should be specified. -- there's no logging mechanism tying to explicit failure cases like location not being in a proper format, or date_time not being properly formatted. -- the output format is hard to understand +为什么它不好? +- 没有说明`date_time`应该使用的格式 +- 没有说明位置应该如何指定 +- 没有记录机制来处理明确的报错情况,如位置格式不正确或date_time格式不正确 +- 输出格式难以理解 -If the tool call fails, the error trace logged in memory can help the LLM reverse engineer the tool to fix the errors. But why leave it with so much heavy lifting to do? +如果工具调用失败,内存中记录的错误跟踪,可以帮助LLM逆向工程工具来修复错误。但为什么要让它做这么多繁重的工作呢? -A better way to build this tool would have been the following: +构建这个工具的更好方式如下: ```python @tool def get_weather_api(location: str, date_time: str) -> str: @@ -113,11 +113,11 @@ def get_weather_api(location: str, date_time: str) -> str: return f"Weather report for {location}, {date_time}: Temperature will be {temperature_celsius}°C, risk of rain is {risk_of_rain*100:.0f}%, wave height is {wave_height}m." ``` -In general, to ease the load on your LLM, the good question to ask yourself is: "How easy would it be for me, if I was dumb and using this tool for the first time ever, to program with this tool and correct my own errors?". +一般来说,为了减轻LLM的负担,要问自己的好问题是:"如果我是一个第一次使用这个工具的傻瓜,使用这个工具编程并纠正自己的错误有多容易?"。 -### Give more arguments to the agent +### 给agent更多参数 -To pass some additional objects to your agent beyond the simple string describing the task, you can use the `additional_args` argument to pass any type of object: +除了简单的任务描述字符串外,你还可以使用`additional_args`参数传递任何类型的对象: ```py from smolagents import CodeAgent, HfApiModel @@ -131,17 +131,16 @@ agent.run( additional_args={"mp3_sound_file_url":'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3'} ) ``` -For instance, you can use this `additional_args` argument to pass images or strings that you want your agent to leverage. +例如,你可以使用这个`additional_args`参数传递你希望agent利用的图像或字符串。 +## 如何调试你的agent -## How to debug your agent +### 1. 使用更强大的LLM -### 1. Use a stronger LLM - -In an agentic workflows, some of the errors are actual errors, some other are the fault of your LLM engine not reasoning properly. -For instance, consider this trace for an `CodeAgent` that I asked to create a car picture: -``` +在agent工作流中,有些错误是实际错误,有些则是你的LLM引擎没有正确推理的结果。 +例如,参考这个我要求创建一个汽车图片的`CodeAgent`的运行记录: +```text ==================================================================================================== New task ==================================================================================================== Make me a cool car picture ──────────────────────────────────────────────────────────────────────────────────────────────────── New step ──────────────────────────────────────────────────────────────────────────────────────────────────── @@ -167,36 +166,36 @@ Last output from code snippet: ──────────────── Final answer: /var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png ``` -The user sees, instead of an image being returned, a path being returned to them. -It could look like a bug from the system, but actually the agentic system didn't cause the error: it's just that the LLM brain did the mistake of not saving the image output into a variable. -Thus it cannot access the image again except by leveraging the path that was logged while saving the image, so it returns the path instead of an image. +用户看到的是返回了一个路径,而不是图像。 +这看起来像是系统的错误,但实际上agent系统并没有导致错误:只是LLM大脑犯了一个错误,没有把图像输出,保存到变量中。 +因此,它无法再次访问图像,只能利用保存图像时记录的路径,所以它返回的是路径,而不是图像。 -The first step to debugging your agent is thus "Use a more powerful LLM". Alternatives like `Qwen2/5-72B-Instruct` wouldn't have made that mistake. +调试agent的第一步是"使用更强大的LLM"。像`Qwen2.5-72B-Instruct`这样的替代方案不会犯这种错误。 -### 2. Provide more guidance / more information +### 2. 提供更多指导/更多信息 -You can also use less powerful models, provided you guide them more effectively. +你也可以使用不太强大的模型,只要你更有效地指导它们。 -Put yourself in the shoes of your model: if you were the model solving the task, would you struggle with the information available to you (from the system prompt + task formulation + tool description) ? +站在模型的角度思考:如果你是模型在解决任务,你会因为系统提示+任务表述+工具描述中提供的信息而挣扎吗? -Would you need some added clarifications? +你需要一些额外的说明吗? -To provide extra information, we do not recommend to change the system prompt right away: the default system prompt has many adjustments that you do not want to mess up except if you understand the prompt very well. -Better ways to guide your LLM engine are: -- If it 's about the task to solve: add all these details to the task. The task could be 100s of pages long. -- If it's about how to use tools: the description attribute of your tools. +为了提供额外信息,我们不建议立即更改系统提示:默认系统提示有许多调整,除非你非常了解提示,否则你很容易翻车。 +更好的指导LLM引擎的方法是: +- 如果是关于要解决的任务:把所有细节添加到任务中。任务可以有几百页长。 +- 如果是关于如何使用工具:你的工具的description属性。 -### 3. Change the system prompt (generally not advised) +### 3. 更改系统提示(通常不建议) -If above clarifications above are not sufficient, you can change the system prompt. +如果上述说明不够,你可以更改系统提示。 -Let's see how it works. For example, let us check the default system prompt for the [`CodeAgent`] (below version is shortened by skipping zero-shot examples). +让我们看看它是如何工作的。例如,让我们检查[`CodeAgent`]的默认系统提示(下面的版本通过跳过零样本示例进行了缩短)。 ```python print(agent.system_prompt_template) ``` -Here is what you get: +你会得到: ```text You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can. To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code. @@ -233,19 +232,19 @@ Here are the rules you should always follow to solve your task: Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. ``` -As you can see, there are placeholders like `"{{tool_descriptions}}"`: these will be used upon agent initialization to insert certain automatically generated descriptions of tools or managed agents. +如你所见,有一些占位符,如`"{{tool_descriptions}}"`:这些将在agent初始化时用于插入某些自动生成的工具或管理agent的描述。 -So while you can overwrite this system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter, your new system prompt must contain the following placeholders: -- `"{{tool_descriptions}}"` to insert tool descriptions. -- `"{{managed_agents_description}}"` to insert the description for managed agents if there are any. -- For `CodeAgent` only: `"{{authorized_imports}}"` to insert the list of authorized imports. +因此,虽然你可以通过将自定义提示作为参数传递给`system_prompt`参数来覆盖此系统提示模板,但你的新系统提示必须包含以下占位符: +- `"{{tool_descriptions}}"` 用于插入工具描述。 +- `"{{managed_agents_description}}"` 用于插入managed agent的描述(如果有)。 +- 仅限`CodeAgent`:`"{{authorized_imports}}"` 用于插入授权导入列表。 -Then you can change the system prompt as follows: +然后你可以根据如下,更改系统提示: ```py from smolagents.prompts import CODE_SYSTEM_PROMPT -modified_system_prompt = CODE_SYSTEM_PROMPT + "\nHere you go!" # Change the system prompt here +modified_system_prompt = CODE_SYSTEM_PROMPT + "\nHere you go!" # 在此更改系统提示 agent = CodeAgent( tools=[], @@ -254,12 +253,12 @@ agent = CodeAgent( ) ``` -This also works with the [`ToolCallingAgent`]. +这也适用于[`ToolCallingAgent`]。 -### 4. Extra planning +### 4. 额外规划 -We provide a model for a supplementary planning step, that an agent can run regularly in-between normal action steps. In this step, there is no tool call, the LLM is simply asked to update a list of facts it knows and to reflect on what steps it should take next based on those facts. +我们提供了一个用于补充规划步骤的模型,agent可以在正常操作步骤之间定期运行。在此步骤中,没有工具调用,LLM只是被要求更新它知道的事实列表,并根据这些事实反推它应该采取的下一步。 ```py from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool @@ -267,7 +266,7 @@ from dotenv import load_dotenv load_dotenv() -# Import tool from Hub +# 从Hub导入工具 image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) search_tool = DuckDuckGoSearchTool() @@ -275,11 +274,11 @@ search_tool = DuckDuckGoSearchTool() agent = CodeAgent( tools=[search_tool], model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"), - planning_interval=3 # This is where you activate planning! + planning_interval=3 # 这是你激活规划的地方! ) -# Run it! +# 运行它! result = agent.run( "How long would a cheetah at full speed take to run the length of Pont Alexandre III?", ) -``` +``` \ No newline at end of file From 20de4b854ff8639628fa29c0ad1ae59f89580a53 Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 15:39:46 +0800 Subject: [PATCH 05/10] Translate `tutorials/tools.md` --- docs/source/zh/_toctree.yml | 2 +- docs/source/zh/tutorials/tools.md | 113 +++++++++++++++--------------- 2 files changed, 57 insertions(+), 58 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index c55e27e96..e0c3928a8 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -9,7 +9,7 @@ - local: tutorials/building_good_agents title: ✨ 构建好用的agents - local: tutorials/tools - title: 🛠️ Tools - in-depth guide + title: 🛠️ 工具 - 深度指南 - local: tutorials/secure_code_execution title: 🛡️ Secure your code execution with E2B - title: Conceptual guides diff --git a/docs/source/zh/tutorials/tools.md b/docs/source/zh/tutorials/tools.md index 014cd3b6e..e87854a95 100644 --- a/docs/source/zh/tutorials/tools.md +++ b/docs/source/zh/tutorials/tools.md @@ -13,34 +13,34 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# Tools +# 工具 [[open-in-colab]] -Here, we're going to see advanced tool usage. +在这里,我们将学习高级工具的使用。 > [!TIP] -> If you're new to building agents, make sure to first read the [intro to agents](../conceptual_guides/intro_agents) and the [guided tour of smolagents](../guided_tour). +> 如果你是构建agent的新手,请确保先阅读[agent介绍](../conceptual_guides/intro_agents)和[smolagents导览](../guided_tour)。 -- [Tools](#tools) - - [What is a tool, and how to build one?](#what-is-a-tool-and-how-to-build-one) - - [Share your tool to the Hub](#share-your-tool-to-the-hub) - - [Import a Space as a tool](#import-a-space-as-a-tool) - - [Use LangChain tools](#use-langchain-tools) - - [Manage your agent's toolbox](#manage-your-agents-toolbox) - - [Use a collection of tools](#use-a-collection-of-tools) +- [工具](#工具) + - [什么是工具,如何构建一个工具?](#什么是工具如何构建一个工具) + - [将你的工具分享到Hub](#将你的工具分享到hub) + - [将Space导入为工具](#将space导入为工具) + - [使用LangChain工具](#使用langchain工具) + - [管理你的agent工具箱](#管理你的agent工具箱) + - [使用工具集合](#使用工具集合) -### What is a tool, and how to build one? +### 什么是工具,如何构建一个工具? -A tool is mostly a function that an LLM can use in an agentic system. +工具主要是LLM可以在agent系统中使用的函数。 -But to use it, the LLM will need to be given an API: name, tool description, input types and descriptions, output type. +但要使用它,LLM需要被提供一个API:名称、工具描述、输入类型和描述、输出类型。 -So it cannot be only a function. It should be a class. +所以它不能仅仅是一个函数。它应该是一个类。 -So at core, the tool is a class that wraps a function with metadata that helps the LLM understand how to use it. +因此,核心上,工具是一个类,它包装了一个函数,并带有帮助LLM理解如何使用它的元数据。 -Here's how it looks: +以下是它的结构: ```python from smolagents import Tool @@ -67,39 +67,38 @@ class HFModelDownloadsTool(Tool): model_downloads_tool = HFModelDownloadsTool() ``` -The custom tool subclasses [`Tool`] to inherit useful methods. The child class also defines: -- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name it `model_download_counter`. -- An attribute `description` is used to populate the agent's system prompt. -- An `inputs` attribute, which is a dictionary with keys `"type"` and `"description"`. It contains information that helps the Python interpreter make educated choices about the input. -- An `output_type` attribute, which specifies the output type. The types for both `inputs` and `output_type` should be [Pydantic formats](https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema), they can be either of these: [`~AUTHORIZED_TYPES`]. -- A `forward` method which contains the inference code to be executed. +自定义工具继承[`Tool`]以继承有用的方法。子类还定义了: +- 一个属性`name`,对应于工具本身的名称。名称通常描述工具的功能。由于代码返回任务中下载量最多的模型,我们将其命名为`model_download_counter`。 +- 一个属性`description`,用于填充agent的系统提示。 +- 一个`inputs`属性,它是一个带有键`"type"`和`"description"`的字典。它包含帮助Python解释器对输入做出明智选择的信息。 +- 一个`output_type`属性,指定输出类型。`inputs`和`output_type`的类型应为[Pydantic格式](https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema),它们可以是以下之一:[`~AUTHORIZED_TYPES`]。 +- 一个`forward`方法,包含要执行的推理代码。 -And that's all it needs to be used in an agent! +这就是它在agent中使用所需的全部内容! -There's another way to build a tool. In the [guided_tour](../guided_tour), we implemented a tool using the `@tool` decorator. The [`tool`] decorator is the recommended way to define simple tools, but sometimes you need more than this: using several methods in a class for more clarity, or using additional class attributes. +还有另一种构建工具的方法。在[guided_tour](../guided_tour)中,我们使用`@tool`装饰器实现了一个工具。[`tool`]装饰器是定义简单工具的推荐方式,但有时你需要更多:在类中使用多个方法以获得更清晰的代码,或使用额外的类属性。 -In this case, you can build your tool by subclassing [`Tool`] as described above. +在这种情况下,你可以通过如上所述继承[`Tool`]来构建你的工具。 -### Share your tool to the Hub +### 将你的工具分享到Hub -You can share your custom tool to the Hub by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access. +你可以通过调用[`~Tool.push_to_hub`]将你的自定义工具分享到Hub。确保你已经在Hub上为其创建了一个仓库,并且使用的是具有读取权限的token。 ```python model_downloads_tool.push_to_hub("{your_username}/hf-model-downloads", token="<YOUR_HUGGINGFACEHUB_API_TOKEN>") ``` -For the push to Hub to work, your tool will need to respect some rules: -- All methods are self-contained, e.g. use variables that come either from their args. -- As per the above point, **all imports should be defined directly within the tool's functions**, else you will get an error when trying to call [`~Tool.save`] or [`~Tool.push_to_hub`] with your custom tool. -- If you subclass the `__init__` method, you can give it no other argument than `self`. This is because arguments set during a specific tool instance's initialization are hard to track, which prevents from sharing them properly to the hub. And anyway, the idea of making a specific class is that you can already set class attributes for anything you need to hard-code (just set `your_variable=(...)` directly under the `class YourTool(Tool):` line). And of course you can still create a class attribute anywhere in your code by assigning stuff to `self.your_variable`. +为了使推送到Hub正常工作,你的工具需要遵守一些规则: +- 所有方法都是自包含的,例如使用来自其参数中的变量。 +- 根据上述要点,**所有导入应直接在工具的函数中定义**,否则在尝试使用[`~Tool.save`]或[`~Tool.push_to_hub`]调用你的自定义工具时会出现错误。 +- 如果你继承了`__init__`方法,除了`self`之外,你不能给它任何其他参数。这是因为在特定工具实例初始化期间设置的参数很难跟踪,这阻碍了将它们正确分享到Hub。无论如何,创建特定类的想法是你已经可以为任何需要硬编码的内容设置类属性(只需在`class YourTool(Tool):`行下直接设置`your_variable=(...)`)。当然,你仍然可以通过将内容分配给`self.your_variable`在代码中的任何地方创建类属性。 +一旦你的工具被推送到Hub,你就可以查看它。[这里](https://huggingface.co/spaces/m-ric/hf-model-downloads)是我推送的`model_downloads_tool`。它有一个漂亮的gradio界面。 -Once your tool is pushed to Hub, you can visualize it. [Here](https://huggingface.co/spaces/m-ric/hf-model-downloads) is the `model_downloads_tool` that I've pushed. It has a nice gradio interface. +在深入工具文件时,你可以发现所有工具的逻辑都在[tool.py](https://huggingface.co/spaces/m-ric/hf-model-downloads/blob/main/tool.py)下。这是你可以检查其他人分享的工具的地方。 -When diving into the tool files, you can find that all the tool's logic is under [tool.py](https://huggingface.co/spaces/m-ric/hf-model-downloads/blob/main/tool.py). That is where you can inspect a tool shared by someone else. - -Then you can load the tool with [`load_tool`] or create it with [`~Tool.from_hub`] and pass it to the `tools` parameter in your agent. -Since running tools means running custom code, you need to make sure you trust the repository, thus we require to pass `trust_remote_code=True` to load a tool from the Hub. +然后你可以使用[`load_tool`]加载工具或使用[`~Tool.from_hub`]创建它,并将其传递给agent中的`tools`参数。 +由于运行工具意味着运行自定义代码,你需要确保你信任该仓库,因此我们需要传递`trust_remote_code=True`来从Hub加载工具。 ```python from smolagents import load_tool, CodeAgent @@ -110,13 +109,13 @@ model_download_tool = load_tool( ) ``` -### Import a Space as a tool +### 将Space导入为工具 -You can directly import a Space from the Hub as a tool using the [`Tool.from_space`] method! +你可以使用[`Tool.from_space`]方法直接从Hub导入一个Space作为工具! -You only need to provide the id of the Space on the Hub, its name, and a description that will help you agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space. +你只需要提供Hub上Space的id、它的名称和一个帮助你的agent理解工具功能的描述。在底层,这将使用[`gradio-client`](https://pypi.org/project/gradio-client/)库来调用Space。 -For instance, let's import the [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space from the Hub and use it to generate an image. +例如,让我们从Hub导入[FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space并使用它生成一张图片。 ```python image_generation_tool = Tool.from_space( @@ -127,11 +126,11 @@ image_generation_tool = Tool.from_space( image_generation_tool("A sunny beach") ``` -And voilà, here's your image! 🏖️ +瞧,这是你的图片!🏖️ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/sunny_beach.webp"> -Then you can use this tool just like any other tool. For example, let's improve the prompt `a rabbit wearing a space suit` and generate an image of it. +然后你可以像使用任何其他工具一样使用这个工具。例如,让我们改进提示`A rabbit wearing a space suit`并生成它的图片。 ```python from smolagents import CodeAgent, HfApiModel @@ -156,15 +155,15 @@ final_answer(image) <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit_spacesuit_flux.webp"> -How cool is this? 🤩 +这得有多酷?🤩 -### Use LangChain tools +### 使用LangChain工具 -We love Langchain and think it has a very compelling suite of tools. -To import a tool from LangChain, use the `from_langchain()` method. +我们喜欢Langchain,并认为它有一套非常吸引人的工具。 +要从LangChain导入工具,请使用`from_langchain()`方法。 -Here is how you can use it to recreate the intro's search result using a LangChain web search tool. -This tool will need `pip install langchain google-search-results -q` to work properly. +以下是如何使用它来重现介绍中的搜索结果,使用LangChain的web搜索工具。 +这个工具需要`pip install langchain google-search-results -q`才能正常工作。 ```python from langchain.agents import load_tools @@ -175,11 +174,11 @@ agent = CodeAgent(tools=[search_tool], model=model) agent.run("How many more blocks (also denoted as layers) are in BERT base encoder compared to the encoder from the architecture proposed in Attention is All You Need?") ``` -### Manage your agent's toolbox +### 管理你的agent工具箱 -You can manage an agent's toolbox by adding or replacing a tool. +你可以通过添加或替换工具来管理agent的工具箱。 -Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox. +让我们将`model_download_tool`添加到一个仅使用默认工具箱初始化的现有agent中。 ```python from smolagents import HfApiModel @@ -189,7 +188,7 @@ model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.tools.append(model_download_tool) ``` -Now we can leverage the new tool: +现在我们可以利用新工具: ```python agent.run( @@ -199,13 +198,13 @@ agent.run( > [!TIP] -> Beware of not adding too many tools to an agent: this can overwhelm weaker LLM engines. +> 注意不要向agent添加太多工具:这可能会让较弱的LLM引擎不堪重负。 -### Use a collection of tools +### 使用工具集合 -You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use. -Then pass them as a list to initialize your agent, and start using them! +你可以通过使用ToolCollection对象来利用工具集合,使用你想要使用的集合的slug。 +然后将它们作为列表传递给agent初始化,并开始使用它们! ```py from smolagents import ToolCollection, CodeAgent @@ -219,4 +218,4 @@ agent = CodeAgent(tools=[*image_tool_collection.tools], model=model, add_base_to agent.run("Please draw me a picture of rivers and lakes.") ``` -To speed up the start, tools are loaded only if called by the agent. +为了加快启动速度,工具仅在agent调用时加载。 From 477bb7c442dc20764979796670b309d87e52978b Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 15:41:13 +0800 Subject: [PATCH 06/10] Translate `tutorials/secure_code_execution.md` --- docs/source/zh/_toctree.yml | 2 +- .../zh/tutorials/secure_code_execution.md | 62 +++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index e0c3928a8..c8f16997f 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -11,7 +11,7 @@ - local: tutorials/tools title: 🛠️ 工具 - 深度指南 - local: tutorials/secure_code_execution - title: 🛡️ Secure your code execution with E2B + title: 🛡️ 使用 E2B 保护你的代码执行 - title: Conceptual guides sections: - local: conceptual_guides/intro_agents diff --git a/docs/source/zh/tutorials/secure_code_execution.md b/docs/source/zh/tutorials/secure_code_execution.md index d8a6109ae..130ccd8e4 100644 --- a/docs/source/zh/tutorials/secure_code_execution.md +++ b/docs/source/zh/tutorials/secure_code_execution.md @@ -13,59 +13,59 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# Secure code execution +# 安全代码执行 [[open-in-colab]] > [!TIP] -> If you're new to building agents, make sure to first read the [intro to agents](../conceptual_guides/intro_agents) and the [guided tour of smolagents](../guided_tour). +> 如果你是第一次构建agent,请先阅读[agent介绍](../conceptual_guides/intro_agents)和[smolagents 导览](../guided_tour)。 -### Code agents +### 代码智能体 -[Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the LLM write its actions (the tool calls) in code is much better than the current standard format for tool calling, which is across the industry different shades of "writing actions as a JSON of tools names and arguments to use". +[多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [表明](https://huggingface.co/papers/2401.00812),让大语言模型用代码编写其动作(工具调用)比当前标准的工具调用格式要好得多,目前行业标准是"将动作写成包含工具名称和参数的JSON"的各种变体。 -Why is code better? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets was a better way, this package would have been written in JSON snippets and the devil would be laughing at us. +为什么代码更好?因为我们专门为计算机执行的动作而设计编程语言。如果JSON片段是更好的方式,那么这个工具包就应该是用JSON片段编写的,魔鬼就会嘲笑我们。 -Code is just a better way to express actions on a computer. It has better: -- **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function? -- **Object management:** how do you store the output of an action like `generate_image` in JSON? -- **Generality:** code is built to express simply anything you can do have a computer do. -- **Representation in LLM training corpuses:** why not leverage this benediction of the sky that plenty of quality actions have already been included in LLM training corpuses? +代码就是表达计算机动作的更好方式。它具有更好的: +- **组合性**:你能像定义Python函数那样,在JSON动作中嵌套其他JSON动作,或者定义一组JSON动作以便以后重用吗? +- **对象管理**:你如何在JSON中存储像`generate_image`这样的动作的输出? +- **通用性**:代码是为了简单地表达任何可以让计算机做的事情而构建的。 +- **在LLM训练语料库中的表示**:天赐良机,为什么不利用已经包含在LLM训练语料库中的大量高质量动作呢? -This is illustrated on the figure below, taken from [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030). +下图展示了这一点,取自[可执行代码动作引出更好的LLM智能体](https://huggingface.co/papers/2402.01030)。 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png"> -This is why we put emphasis on proposing code agents, in this case python agents, which meant putting higher effort on building secure python interpreters. +这就是为什么我们强调提出代码智能体,在本例中是Python智能体,这意味着我们要在构建安全的Python解释器上投入更多精力。 -### Local python interpreter +### 本地Python解释器 -By default, the `CodeAgent` runs LLM-generated code in your environment. -This execution is not done by the vanilla Python interpreter: we've re-built a more secure `LocalPythonInterpreter` from the ground up. -This interpreter is designed for security by: - - Restricting the imports to a list explicitly passed by the user - - Capping the number of operations to prevent infinite loops and resource bloating. - - Will not perform any operation that's not pre-defined. +默认情况下,`CodeAgent`会在你的环境中运行LLM生成的代码。 +这个执行不是由普通的Python解释器完成的:我们从零开始重新构建了一个更安全的`LocalPythonInterpreter`。 +这个解释器通过以下方式设计以确保安全: + - 将导入限制为用户显式传递的列表 + - 限制操作次数以防止无限循环和资源膨胀 + - 不会执行任何未预定义的操作 -We've used this on many use cases, without ever observing any damage to the environment. +我们已经在许多用例中使用了这个解释器,从未观察到对环境造成任何损害。 -However this solution is not watertight: one could imagine occasions where LLMs fine-tuned for malignant actions could still hurt your environment. For instance if you've allowed an innocuous package like `Pillow` to process images, the LLM could generate thousands of saves of images to bloat your hard drive. -It's certainly not likely if you've chosen the LLM engine yourself, but it could happen. +然而,这个解决方案并不是万无一失的:可以想象,如果LLM被微调用于恶意操作,仍然可能损害你的环境。例如,如果你允许像`Pillow`这样无害的包处理图像,LLM可能会生成数千张图像保存以膨胀你的硬盘。 +如果你自己选择了LLM引擎,这当然不太可能,但它可能会发生。 -So if you want to be extra cautious, you can use the remote code execution option described below. +所以如果你想格外谨慎,可以使用下面描述的远程代码执行选项。 -### E2B code executor +### E2B代码执行器 -For maximum security, you can use our integration with E2B to run code in a sandboxed environment. This is a remote execution service that runs your code in an isolated container, making it impossible for the code to affect your local environment. +为了最大程度的安全性,你可以使用我们与E2B的集成在沙盒环境中运行代码。这是一个远程执行服务,可以在隔离的容器中运行你的代码,使代码无法影响你的本地环境。 -For this, you will need to setup your E2B account and set your `E2B_API_KEY` in your environment variables. Head to [E2B's quickstart documentation](https://e2b.dev/docs/quickstart) for more information. +为此,你需要设置你的E2B账户并在环境变量中设置`E2B_API_KEY`。请前往[E2B快速入门文档](https://e2b.dev/docs/quickstart)了解更多信息。 -Then you can install it with `pip install e2b-code-interpreter python-dotenv`. +然后你可以通过`pip install e2b-code-interpreter python-dotenv`安装它。 -Now you're set! +现在你已经准备好了! -To set the code executor to E2B, simply pass the flag `use_e2b_executor=True` when initializing your `CodeAgent`. -Note that you should add all the tool's dependencies in `additional_authorized_imports`, so that the executor installs them. +要将代码执行器设置为E2B,只需在初始化`CodeAgent`时传递标志`use_e2b_executor=True`。 +请注意,你应该将所有工具的依赖项添加到`additional_authorized_imports`中,以便执行器安装它们。 ```py from smolagents import CodeAgent, VisitWebpageTool, HfApiModel @@ -79,4 +79,4 @@ agent = CodeAgent( agent.run("What was Abraham Lincoln's preferred pet?") ``` -E2B code execution is not compatible with multi-agents at the moment - because having an agent call in a code blob that should be executed remotely is a mess. But we're working on adding it! +目前E2B代码执行暂不兼容多agent——因为把agent调用放在应该在远程执行的代码块里,是非常混乱的。但我们正在努力做到这件事! From 5a1c27902ee2637148e9e78e6e01c01af8a796cf Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 15:49:04 +0800 Subject: [PATCH 07/10] Translate `conceptual_guides/intro_agents.md` --- docs/source/zh/_toctree.yml | 2 +- .../zh/conceptual_guides/intro_agents.md | 114 +++++++++--------- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index c8f16997f..32c819e23 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -15,7 +15,7 @@ - title: Conceptual guides sections: - local: conceptual_guides/intro_agents - title: 🤖 An introduction to agentic systems + title: 🤖 Agent化系统介绍 - local: conceptual_guides/react title: 🤔 How do Multi-step agents work? - title: Examples diff --git a/docs/source/zh/conceptual_guides/intro_agents.md b/docs/source/zh/conceptual_guides/intro_agents.md index c233b39bb..19034356f 100644 --- a/docs/source/zh/conceptual_guides/intro_agents.md +++ b/docs/source/zh/conceptual_guides/intro_agents.md @@ -13,106 +13,110 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# Introduction to Agents -## 🤔 What are agents? +# Agent简介 -Any efficient system using AI will need to provide LLMs some kind of access to the real world: for instance the possibility to call a search tool to get external information, or to act on certain programs in order to solve a task. In other words, LLMs should have ***agency***. Agentic programs are the gateway to the outside world for LLMs. +> [!TIP] +> 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) + +## 🤔 什么是agent? + +任何使用 AI 的高效系统都需要为 LLM 提供某种访问现实世界的方式:例如调用搜索工具获取外部信息,或者操作某些程序以完成任务。换句话说,LLM 应该具有**_Agent能力_**。Agent程序是 LLM 通往外部世界的门户。 > [!TIP] -> AI Agents are **programs where LLM outputs control the workflow**. +> AI agent是**LLM 输出控制工作流的程序**。 -Any system leveraging LLMs will integrate the LLM outputs into code. The influence of the LLM's input on the code workflow is the level of agency of LLMs in the system. +任何利用 LLM 的系统都会将 LLM 输出集成到代码中。LLM 输入对代码工作流的影响程度就是 LLM 在系统中的agent能力级别。 -Note that with this definition, "agent" is not a discrete, 0 or 1 definition: instead, "agency" evolves on a continuous spectrum, as you give more or less power to the LLM on your workflow. +请注意,根据这个定义,"Agent"不是一个离散的、非 0 即 1 的定义:相反,"Agent能力"是一个连续谱系,随着你在工作流中给予 LLM 更多或更少的权力而变化。 -See in the table below how agency can vary across systems: +请参见下表中agent能力在不同系统中的变化: -| Agency Level | Description | How that's called | Example Pattern | -| ------------ | ------------------------------------------------------- | ----------------- | -------------------------------------------------- | -| ☆☆☆ | LLM output has no impact on program flow | Simple Processor | `process_llm_output(llm_response)` | -| ★☆☆ | LLM output determines an if/else switch | Router | `if llm_decision(): path_a() else: path_b()` | -| ★★☆ | LLM output determines function execution | Tool Caller | `run_function(llm_chosen_tool, llm_chosen_args)` | -| ★★★ | LLM output controls iteration and program continuation | Multi-step Agent | `while llm_should_continue(): execute_next_step()` | -| ★★★ | One agentic workflow can start another agentic workflow | Multi-Agent | `if llm_trigger(): execute_agent()` | +| Agent能力级别 | 描述 | 名称 | 示例模式 | +| ------------ | ---------------------------------------------- | ---------- | -------------------------------------------------- | +| ☆☆☆ | LLM 输出对程序流程没有影响 | 简单处理器 | `process_llm_output(llm_response)` | +| ★☆☆ | LLM 输出决定 if/else 分支 | 路由 | `if llm_decision(): path_a() else: path_b()` | +| ★★☆ | LLM 输出决定函数执行 | 工具调用者 | `run_function(llm_chosen_tool, llm_chosen_args)` | +| ★★★ | LLM 输出控制迭代和程序继续 | 多步Agent | `while llm_should_continue(): execute_next_step()` | +| ★★★ | 一个agent工作流可以启动另一个agent工作流 | 多Agent | `if llm_trigger(): execute_agent()` | -The multi-step agent has this code structure: +多步agent具有以下代码结构: ```python memory = [user_defined_task] -while llm_should_continue(memory): # this loop is the multi-step part - action = llm_get_next_action(memory) # this is the tool-calling part +while llm_should_continue(memory): # 这个循环是多步部分 + action = llm_get_next_action(memory) # 这是工具调用部分 observations = execute_action(action) memory += [action, observations] ``` -This agentic system runs in a loop, executing a new action at each step (the action can involve calling some pre-determined *tools* that are just functions), until its observations make it apparent that a satisfactory state has been reached to solve the given task. Here’s an example of how a multi-step agent can solve a simple math question: +这个agent系统在一个循环中运行,每一步执行一个新动作(该动作可能涉及调用一些预定义的*工具*,这些工具只是函数),直到其观察结果表明已达到解决给定任务的满意状态。以下是一个多步agent如何解决简单数学问题的示例: <div class="flex justify-center"> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"/> </div> +## ✅ 何时使用agent / ⛔ 何时避免使用 -## ✅ When to use agents / ⛔ when to avoid them +当你需要 LLM 确定应用程序的工作流时,agent很有用。但它们通常有些过度。问题是:我真的需要工作流的灵活性来有效解决手头的任务吗? +如果预定义的工作流经常不足,这意味着你需要更多的灵活性。 +让我们举个例子:假设你正在开发一个处理冲浪旅行网站客户请求的应用程序。 -Agents are useful when you need an LLM to determine the workflow of an app. But they’re often overkill. The question is: do I really need flexibility in the workflow to efficiently solve the task at hand? -If the pre-determined workflow falls short too often, that means you need more flexibility. -Let's take an example: say you're making an app that handles customer requests on a surfing trip website. +你可以提前知道请求将属于 2 个类别之一(基于用户选择),并且你为这 2 种情况都有预定义的工作流。 -You could know in advance that the requests will can belong to either of 2 buckets (based on user choice), and you have a predefined workflow for each of these 2 cases. +1. 想要了解旅行信息?⇒ 给他们访问搜索栏以搜索你的知识库 +2. 想与销售交谈?⇒ 让他们填写联系表单。 -1. Want some knowledge on the trips? ⇒ give them access to a search bar to search your knowledge base -2. Wants to talk to sales? ⇒ let them type in a contact form. +如果这个确定性工作流适合所有查询,那就直接编码吧!这将为你提供一个 100% 可靠的系统,没有让不可预测的 LLM 干扰你的工作流而引入错误的风险。为了简单和稳健起见,建议规范化不使用任何agent行为。 -If that deterministic workflow fits all queries, by all means just code everything! This will give you a 100% reliable system with no risk of error introduced by letting unpredictable LLMs meddle in your workflow. For the sake of simplicity and robustness, it's advised to regularize towards not using any agentic behaviour. +但如果工作流不能提前确定得那么好呢? -But what if the workflow can't be determined that well in advance? +例如,用户想问:`"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"`这个问题涉及许多因素,可能上述预定的标准都不足以满足这个请求。 -For instance, a user wants to ask : `"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"` This question hinges on many factors, and probably none of the predetermined criteria above will suffice for this request. +如果预定义的工作流经常不足,这意味着你需要更多的灵活性。 -If the pre-determined workflow falls short too often, that means you need more flexibility. +这就是agent设置发挥作用的地方。 -That is where an agentic setup helps. +在上面的例子中,你可以创建一个多步agent,它可以访问天气 API 获取天气预报,Google Maps API 计算旅行距离,员工在线仪表板和你的知识库上的 RAG 系统。 -In the above example, you could just make a multi-step agent that has access to a weather API for weather forecasts, Google Maps API to compute travel distance, an employee availability dashboard and a RAG system on your knowledge base. +直到最近,计算机程序还局限于预定义的工作流,试图通过堆积 if/else 分支来处理复杂性。它们专注于极其狭窄的任务,如"计算这些数字的总和"或"找到这个图中的最短路径"。但实际上,大多数现实生活中的任务,如我们上面的旅行示例,都不适合预定义的工作流。agent系统为程序打开了现实世界任务的大门! -Until recently, computer programs were restricted to pre-determined workflows, trying to handle complexity by piling up if/else switches. They focused on extremely narrow tasks, like "compute the sum of these numbers" or "find the shortest path in this graph". But actually, most real-life tasks, like our trip example above, do not fit in pre-determined workflows. Agentic systems open up the vast world of real-world tasks to programs! +## 为什么选择`smolagents`? -## Why `smolagents`? +对于一些低级的agent用例,如链或路由器,你可以自己编写所有代码。这样会更好,因为它可以让你更好地控制和理解你的系统。 -For some low-level agentic use cases, like chains or routers, you can write all the code yourself. You'll be much better that way, since it will let you control and understand your system better. +但一旦你开始追求更复杂的行为,比如让 LLM 调用函数(即"工具调用")或让 LLM 运行 while 循环("多步agent"),一些抽象就变得必要: -But once you start going for more complicated behaviours like letting an LLM call a function (that's "tool calling") or letting an LLM run a while loop ("multi-step agent"), some abstractions become necessary: -- for tool calling, you need to parse the agent's output, so this output needs a predefined format like "Thought: I should call tool 'get_weather'. Action: get_weather(Paris).", that you parse with a predefined function, and system prompt given to the LLM should notify it about this format. -- for a multi-step agent where the LLM output determines the loop, you need to give a different prompt to the LLM based on what happened in the last loop iteration: so you need some kind of memory. +- 对于工具调用,你需要解析agent的输出,因此这个输出需要一个预定义的格式,如"Thought: I should call tool 'get_weather'. Action: get_weather(Paris).",你用预定义的函数解析它,并且给 LLM 的系统提示应该通知它这个格式。 +- 对于 LLM 输出决定循环的多步agent,你需要根据上次循环迭代中发生的情况给 LLM 不同的提示:所以你需要某种记忆能力。 -See? With these two examples, we already found the need for a few items to help us: +看到了吗?通过这两个例子,我们已经发现需要一些项目来帮助我们: -- Of course, an LLM that acts as the engine powering the system -- A list of tools that the agent can access -- A parser that extracts tool calls from the LLM output -- A system prompt synced with the parser -- A memory +- 当然,一个作为系统引擎的 LLM +- agent可以访问的工具列表 +- 从 LLM 输出中提取工具调用的解析器 +- 与解析器同步的系统提示 +- 记忆能力 -But wait, since we give room to LLMs in decisions, surely they will make mistakes: so we need error logging and retry mechanisms. +但是等等,既然我们给 LLM 在决策中留出了空间,它们肯定会犯错误:所以我们需要错误日志记录和重试机制。 -All these elements need tight coupling to make a well-functioning system. That's why we decided we needed to make basic building blocks to make all this stuff work together. +所有这些元素都需要紧密耦合才能形成一个功能良好的系统。这就是为什么我们决定需要制作基本构建块来让所有这些东西协同工作。 -## Code agents +## 代码agent -In a multi-step agent, at each step, the LLM can write an action, in the form of some calls to external tools. A common format (used by Anthropic, OpenAI, and many others) for writing these actions is generally different shades of "writing actions as a JSON of tools names and arguments to use, which you then parse to know which tool to execute and with which arguments". +在多步agent中,每一步 LLM 都可以编写一个动作,形式为调用外部工具。编写这些动作的常见格式(由 Anthropic、OpenAI 等使用)通常是"将动作编写为工具名称和要使用的参数的 JSON,然后解析以知道要执行哪个工具以及使用哪些参数"的不同变体。 -[Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the tool calling LLMs in code is much better. +[多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [论文](https://huggingface.co/papers/2401.00812)表明,在代码中进行工具调用的 LLM 要好得多。 -The reason for this simply that *we crafted our code languages specifically to be the best possible way to express actions performed by a computer*. If JSON snippets were a better expression, JSON would be the top programming language and programming would be hell on earth. +原因很简单,_我们专门设计了我们的代码语言,使其成为表达计算机执行动作的最佳方式_。如果 JSON 片段是更好的表达方式,JSON 将成为顶级编程语言,编程将变得非常困难。 -The figure below, taken from [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030), illustrate some advantages of writing actions in code: +下图取自[Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030),说明了用代码编写动作的一些优势: <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png"> -Writing actions in code rather than JSON-like snippets provides better: +与 JSON 片段相比,用代码编写动作提供了更好的: -- **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function? -- **Object management:** how do you store the output of an action like `generate_image` in JSON? -- **Generality:** code is built to express simply anything you can have a computer do. -- **Representation in LLM training data:** plenty of quality code actions is already included in LLMs’ training data which means they’re already trained for this! +- **可组合性:** 你能像定义 python 函数一样,将 JSON 动作嵌套在一起,或定义一组 JSON 动作以供重用吗? +- **对象管理:** 你如何在 JSON 中存储像`generate_image`这样的动作的输出? +- **通用性:** 代码被构建为简单地表达任何你可以让计算机做的事情。 +- **LLM 训练数据中的表示:** 大量高质量的代码动作已经包含在 LLM 的训练数据中,这意味着它们已经为此进行了训练! From 761818d99a358336d4e114cf8838fa62d4633905 Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 15:52:00 +0800 Subject: [PATCH 08/10] Translate `conceptual_guides/react.md` --- docs/source/zh/_toctree.yml | 2 +- docs/source/zh/conceptual_guides/react.md | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index 32c819e23..ce8251f7e 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -17,7 +17,7 @@ - local: conceptual_guides/intro_agents title: 🤖 Agent化系统介绍 - local: conceptual_guides/react - title: 🤔 How do Multi-step agents work? + title: 🤔 多步骤Agent是如何工作的? - title: Examples sections: - local: examples/text_to_sql diff --git a/docs/source/zh/conceptual_guides/react.md b/docs/source/zh/conceptual_guides/react.md index d85c9cad3..e90f8bd9b 100644 --- a/docs/source/zh/conceptual_guides/react.md +++ b/docs/source/zh/conceptual_guides/react.md @@ -13,18 +13,18 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# How do multi-step agents work? +# 多步骤agent是如何工作的? -The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents. +ReAct框架([Yao et al., 2022](https://huggingface.co/papers/2210.03629))是目前构建agent的主要方法。 -The name is based on the concatenation of two words, "Reason" and "Act." Indeed, agents following this architecture will solve their task in as many steps as needed, each step consisting of a Reasoning step, then an Action step where it formulates tool calls that will bring it closer to solving the task at hand. +该名称基于两个词的组合:"Reason"(推理)和"Act"(行动)。实际上,遵循此架构的agent将根据需要尽可能多的步骤来解决其任务,每个步骤包括一个推理步骤,然后是一个行动步骤,在该步骤中,它制定工具调用,使其更接近解决手头的任务。 -React process involves keeping a memory of past steps. +ReAct过程涉及保留过去步骤的记忆。 > [!TIP] -> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about multi-step agents. +> 阅读[Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents)博客文章以了解更多关于多步agent的信息。 -Here is a video overview of how that works: +以下是其工作原理的视频概述: <div class="flex justify-center"> <img @@ -37,11 +37,11 @@ Here is a video overview of how that works: /> </div> - + -We implement two versions of ToolCallingAgent: -- [`ToolCallingAgent`] generates tool calls as a JSON in its output. -- [`CodeAgent`] is a new type of ToolCallingAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance. +我们实现了两个版本的ToolCallingAgent: +- [`ToolCallingAgent`]在其输出中生成JSON格式的工具调用。 +- [`CodeAgent`]是一种新型的ToolCallingAgent,它生成代码块形式的工具调用,这对于具有强大编码性能的LLM非常有效。 > [!TIP] -> We also provide an option to run agents in one-shot: just pass `single_step=True` when launching the agent, like `agent.run(your_task, single_step=True)` \ No newline at end of file +> 我们还提供了一个选项来以单步模式运行agent:只需在启动agent时传递`single_step=True`,例如`agent.run(your_task, single_step=True)` \ No newline at end of file From 068fc887b438e6a0164d27c83b31a9576df7fc4a Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 16:31:57 +0800 Subject: [PATCH 09/10] Add community guide video to `guided_tour.md` --- docs/source/zh/guided_tour.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/zh/guided_tour.md b/docs/source/zh/guided_tour.md index d16786596..ead192e37 100644 --- a/docs/source/zh/guided_tour.md +++ b/docs/source/zh/guided_tour.md @@ -22,6 +22,9 @@ rendered properly in your Markdown viewer. > [!TIP] > 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) +> [!TIP] +> 中文社区发布了关于smolagents的介绍和实践讲解视频(来源:[Issue#80](https://github.com/huggingface/smolagents/issues/80)),你可以访问[这里](https://www.youtube.com/watch?v=wwN3oAugc4c)进行观看! + ### 构建您的agent 要初始化一个最小化的agent,您至少需要以下两个参数: From 89e849ca728d6dd50e2d0652ffb692504d7d1ab1 Mon Sep 17 00:00:00 2001 From: Ayuilos <ekklovepeace@gmail.com> Date: Sat, 11 Jan 2025 19:39:12 +0800 Subject: [PATCH 10/10] Add `space` between CN char and EN char for better reading experience --- docs/source/zh/_toctree.yml | 6 +- .../zh/conceptual_guides/intro_agents.md | 62 ++++---- docs/source/zh/conceptual_guides/react.md | 20 +-- docs/source/zh/guided_tour.md | 142 +++++++++--------- docs/source/zh/index.md | 16 +- .../zh/tutorials/building_good_agents.md | 88 +++++------ .../zh/tutorials/secure_code_execution.md | 40 ++--- docs/source/zh/tutorials/tools.md | 86 +++++------ 8 files changed, 230 insertions(+), 230 deletions(-) diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index ce8251f7e..4da8f4859 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -7,7 +7,7 @@ - title: Tutorials sections: - local: tutorials/building_good_agents - title: ✨ 构建好用的agents + title: ✨ 构建好用的 agents - local: tutorials/tools title: 🛠️ 工具 - 深度指南 - local: tutorials/secure_code_execution @@ -15,9 +15,9 @@ - title: Conceptual guides sections: - local: conceptual_guides/intro_agents - title: 🤖 Agent化系统介绍 + title: 🤖 Agent 化系统介绍 - local: conceptual_guides/react - title: 🤔 多步骤Agent是如何工作的? + title: 🤔 多步骤 Agent 是如何工作的? - title: Examples sections: - local: examples/text_to_sql diff --git a/docs/source/zh/conceptual_guides/intro_agents.md b/docs/source/zh/conceptual_guides/intro_agents.md index 19034356f..416aabcb5 100644 --- a/docs/source/zh/conceptual_guides/intro_agents.md +++ b/docs/source/zh/conceptual_guides/intro_agents.md @@ -14,33 +14,33 @@ rendered properly in your Markdown viewer. --> -# Agent简介 +# Agent 简介 > [!TIP] -> 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) +> 译者注:Agent 的业内术语是“智能体”。本译文将保留 agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) -## 🤔 什么是agent? +## 🤔 什么是 agent? -任何使用 AI 的高效系统都需要为 LLM 提供某种访问现实世界的方式:例如调用搜索工具获取外部信息,或者操作某些程序以完成任务。换句话说,LLM 应该具有**_Agent能力_**。Agent程序是 LLM 通往外部世界的门户。 +任何使用 AI 的高效系统都需要为 LLM 提供某种访问现实世界的方式:例如调用搜索工具获取外部信息,或者操作某些程序以完成任务。换句话说,LLM 应该具有 **_Agent 能力_**。Agent 程序是 LLM 通往外部世界的门户。 > [!TIP] -> AI agent是**LLM 输出控制工作流的程序**。 +> AI agent 是 **LLM 输出控制工作流的程序**。 -任何利用 LLM 的系统都会将 LLM 输出集成到代码中。LLM 输入对代码工作流的影响程度就是 LLM 在系统中的agent能力级别。 +任何利用 LLM 的系统都会将 LLM 输出集成到代码中。LLM 输入对代码工作流的影响程度就是 LLM 在系统中的 agent 能力级别。 -请注意,根据这个定义,"Agent"不是一个离散的、非 0 即 1 的定义:相反,"Agent能力"是一个连续谱系,随着你在工作流中给予 LLM 更多或更少的权力而变化。 +请注意,根据这个定义,"Agent" 不是一个离散的、非 0 即 1 的定义:相反,"Agent 能力" 是一个连续谱系,随着你在工作流中给予 LLM 更多或更少的权力而变化。 -请参见下表中agent能力在不同系统中的变化: +请参见下表中 agent 能力在不同系统中的变化: -| Agent能力级别 | 描述 | 名称 | 示例模式 | +| Agent 能力级别 | 描述 | 名称 | 示例模式 | | ------------ | ---------------------------------------------- | ---------- | -------------------------------------------------- | | ☆☆☆ | LLM 输出对程序流程没有影响 | 简单处理器 | `process_llm_output(llm_response)` | | ★☆☆ | LLM 输出决定 if/else 分支 | 路由 | `if llm_decision(): path_a() else: path_b()` | | ★★☆ | LLM 输出决定函数执行 | 工具调用者 | `run_function(llm_chosen_tool, llm_chosen_args)` | -| ★★★ | LLM 输出控制迭代和程序继续 | 多步Agent | `while llm_should_continue(): execute_next_step()` | -| ★★★ | 一个agent工作流可以启动另一个agent工作流 | 多Agent | `if llm_trigger(): execute_agent()` | +| ★★★ | LLM 输出控制迭代和程序继续 | 多步 Agent | `while llm_should_continue(): execute_next_step()` | +| ★★★ | 一个 agent 工作流可以启动另一个 agent 工作流 | 多 Agent | `if llm_trigger(): execute_agent()` | -多步agent具有以下代码结构: +多步 agent 具有以下代码结构: ```python memory = [user_defined_task] @@ -50,15 +50,15 @@ while llm_should_continue(memory): # 这个循环是多步部分 memory += [action, observations] ``` -这个agent系统在一个循环中运行,每一步执行一个新动作(该动作可能涉及调用一些预定义的*工具*,这些工具只是函数),直到其观察结果表明已达到解决给定任务的满意状态。以下是一个多步agent如何解决简单数学问题的示例: +这个 agent 系统在一个循环中运行,每一步执行一个新动作(该动作可能涉及调用一些预定义的 *工具*,这些工具只是函数),直到其观察结果表明已达到解决给定任务的满意状态。以下是一个多步 agent 如何解决简单数学问题的示例: <div class="flex justify-center"> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"/> </div> -## ✅ 何时使用agent / ⛔ 何时避免使用 +## ✅ 何时使用 agent / ⛔ 何时避免使用 -当你需要 LLM 确定应用程序的工作流时,agent很有用。但它们通常有些过度。问题是:我真的需要工作流的灵活性来有效解决手头的任务吗? +当你需要 LLM 确定应用程序的工作流时,agent 很有用。但它们通常有些过度。问题是:我真的需要工作流的灵活性来有效解决手头的任务吗? 如果预定义的工作流经常不足,这意味着你需要更多的灵活性。 让我们举个例子:假设你正在开发一个处理冲浪旅行网站客户请求的应用程序。 @@ -67,33 +67,33 @@ while llm_should_continue(memory): # 这个循环是多步部分 1. 想要了解旅行信息?⇒ 给他们访问搜索栏以搜索你的知识库 2. 想与销售交谈?⇒ 让他们填写联系表单。 -如果这个确定性工作流适合所有查询,那就直接编码吧!这将为你提供一个 100% 可靠的系统,没有让不可预测的 LLM 干扰你的工作流而引入错误的风险。为了简单和稳健起见,建议规范化不使用任何agent行为。 +如果这个确定性工作流适合所有查询,那就直接编码吧!这将为你提供一个 100% 可靠的系统,没有让不可预测的 LLM 干扰你的工作流而引入错误的风险。为了简单和稳健起见,建议规范化不使用任何 agent 行为。 但如果工作流不能提前确定得那么好呢? -例如,用户想问:`"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"`这个问题涉及许多因素,可能上述预定的标准都不足以满足这个请求。 +例如,用户想问:`"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"` 这个问题涉及许多因素,可能上述预定的标准都不足以满足这个请求。 如果预定义的工作流经常不足,这意味着你需要更多的灵活性。 -这就是agent设置发挥作用的地方。 +这就是 agent 设置发挥作用的地方。 -在上面的例子中,你可以创建一个多步agent,它可以访问天气 API 获取天气预报,Google Maps API 计算旅行距离,员工在线仪表板和你的知识库上的 RAG 系统。 +在上面的例子中,你可以创建一个多步 agent,它可以访问天气 API 获取天气预报,Google Maps API 计算旅行距离,员工在线仪表板和你的知识库上的 RAG 系统。 -直到最近,计算机程序还局限于预定义的工作流,试图通过堆积 if/else 分支来处理复杂性。它们专注于极其狭窄的任务,如"计算这些数字的总和"或"找到这个图中的最短路径"。但实际上,大多数现实生活中的任务,如我们上面的旅行示例,都不适合预定义的工作流。agent系统为程序打开了现实世界任务的大门! +直到最近,计算机程序还局限于预定义的工作流,试图通过堆积 if/else 分支来处理复杂性。它们专注于极其狭窄的任务,如"计算这些数字的总和"或"找到这个图中的最短路径"。但实际上,大多数现实生活中的任务,如我们上面的旅行示例,都不适合预定义的工作流。agent 系统为程序打开了现实世界任务的大门! -## 为什么选择`smolagents`? +## 为什么选择 `smolagents`? -对于一些低级的agent用例,如链或路由器,你可以自己编写所有代码。这样会更好,因为它可以让你更好地控制和理解你的系统。 +对于一些低级的 agent 用例,如链或路由器,你可以自己编写所有代码。这样会更好,因为它可以让你更好地控制和理解你的系统。 -但一旦你开始追求更复杂的行为,比如让 LLM 调用函数(即"工具调用")或让 LLM 运行 while 循环("多步agent"),一些抽象就变得必要: +但一旦你开始追求更复杂的行为,比如让 LLM 调用函数(即"工具调用")或让 LLM 运行 while 循环("多步 agent"),一些抽象就变得必要: -- 对于工具调用,你需要解析agent的输出,因此这个输出需要一个预定义的格式,如"Thought: I should call tool 'get_weather'. Action: get_weather(Paris).",你用预定义的函数解析它,并且给 LLM 的系统提示应该通知它这个格式。 -- 对于 LLM 输出决定循环的多步agent,你需要根据上次循环迭代中发生的情况给 LLM 不同的提示:所以你需要某种记忆能力。 +- 对于工具调用,你需要解析 agent 的输出,因此这个输出需要一个预定义的格式,如"Thought: I should call tool 'get_weather'. Action: get_weather(Paris).",你用预定义的函数解析它,并且给 LLM 的系统提示应该通知它这个格式。 +- 对于 LLM 输出决定循环的多步 agent,你需要根据上次循环迭代中发生的情况给 LLM 不同的提示:所以你需要某种记忆能力。 看到了吗?通过这两个例子,我们已经发现需要一些项目来帮助我们: - 当然,一个作为系统引擎的 LLM -- agent可以访问的工具列表 +- agent 可以访问的工具列表 - 从 LLM 输出中提取工具调用的解析器 - 与解析器同步的系统提示 - 记忆能力 @@ -102,21 +102,21 @@ while llm_should_continue(memory): # 这个循环是多步部分 所有这些元素都需要紧密耦合才能形成一个功能良好的系统。这就是为什么我们决定需要制作基本构建块来让所有这些东西协同工作。 -## 代码agent +## 代码 agent -在多步agent中,每一步 LLM 都可以编写一个动作,形式为调用外部工具。编写这些动作的常见格式(由 Anthropic、OpenAI 等使用)通常是"将动作编写为工具名称和要使用的参数的 JSON,然后解析以知道要执行哪个工具以及使用哪些参数"的不同变体。 +在多步 agent 中,每一步 LLM 都可以编写一个动作,形式为调用外部工具。编写这些动作的常见格式(由 Anthropic、OpenAI 等使用)通常是"将动作编写为工具名称和要使用的参数的 JSON,然后解析以知道要执行哪个工具以及使用哪些参数"的不同变体。 -[多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [论文](https://huggingface.co/papers/2401.00812)表明,在代码中进行工具调用的 LLM 要好得多。 +[多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [论文](https://huggingface.co/papers/2401.00812) 表明,在代码中进行工具调用的 LLM 要好得多。 原因很简单,_我们专门设计了我们的代码语言,使其成为表达计算机执行动作的最佳方式_。如果 JSON 片段是更好的表达方式,JSON 将成为顶级编程语言,编程将变得非常困难。 -下图取自[Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030),说明了用代码编写动作的一些优势: +下图取自 [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030),说明了用代码编写动作的一些优势: <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png"> 与 JSON 片段相比,用代码编写动作提供了更好的: - **可组合性:** 你能像定义 python 函数一样,将 JSON 动作嵌套在一起,或定义一组 JSON 动作以供重用吗? -- **对象管理:** 你如何在 JSON 中存储像`generate_image`这样的动作的输出? +- **对象管理:** 你如何在 JSON 中存储像 `generate_image` 这样的动作的输出? - **通用性:** 代码被构建为简单地表达任何你可以让计算机做的事情。 - **LLM 训练数据中的表示:** 大量高质量的代码动作已经包含在 LLM 的训练数据中,这意味着它们已经为此进行了训练! diff --git a/docs/source/zh/conceptual_guides/react.md b/docs/source/zh/conceptual_guides/react.md index e90f8bd9b..24428e03f 100644 --- a/docs/source/zh/conceptual_guides/react.md +++ b/docs/source/zh/conceptual_guides/react.md @@ -13,16 +13,16 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# 多步骤agent是如何工作的? +# 多步骤 agent 是如何工作的? -ReAct框架([Yao et al., 2022](https://huggingface.co/papers/2210.03629))是目前构建agent的主要方法。 +ReAct 框架([Yao et al., 2022](https://huggingface.co/papers/2210.03629))是目前构建 agent 的主要方法。 -该名称基于两个词的组合:"Reason"(推理)和"Act"(行动)。实际上,遵循此架构的agent将根据需要尽可能多的步骤来解决其任务,每个步骤包括一个推理步骤,然后是一个行动步骤,在该步骤中,它制定工具调用,使其更接近解决手头的任务。 +该名称基于两个词的组合:"Reason" (推理)和 "Act" (行动)。实际上,遵循此架构的 agent 将根据需要尽可能多的步骤来解决其任务,每个步骤包括一个推理步骤,然后是一个行动步骤,在该步骤中,它制定工具调用,使其更接近解决手头的任务。 -ReAct过程涉及保留过去步骤的记忆。 +ReAct 过程涉及保留过去步骤的记忆。 > [!TIP] -> 阅读[Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents)博客文章以了解更多关于多步agent的信息。 +> 阅读 [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) 博客文章以了解更多关于多步 agent 的信息。 以下是其工作原理的视频概述: @@ -37,11 +37,11 @@ ReAct过程涉及保留过去步骤的记忆。 /> </div> - + -我们实现了两个版本的ToolCallingAgent: -- [`ToolCallingAgent`]在其输出中生成JSON格式的工具调用。 -- [`CodeAgent`]是一种新型的ToolCallingAgent,它生成代码块形式的工具调用,这对于具有强大编码性能的LLM非常有效。 +我们实现了两个版本的 ToolCallingAgent: +- [`ToolCallingAgent`] 在其输出中生成 JSON 格式的工具调用。 +- [`CodeAgent`] 是一种新型的 ToolCallingAgent,它生成代码块形式的工具调用,这对于具有强大编码性能的 LLM 非常有效。 > [!TIP] -> 我们还提供了一个选项来以单步模式运行agent:只需在启动agent时传递`single_step=True`,例如`agent.run(your_task, single_step=True)` \ No newline at end of file +> 我们还提供了一个选项来以单步模式运行 agent:只需在启动 agent 时传递 `single_step=True`,例如 `agent.run(your_task, single_step=True)` \ No newline at end of file diff --git a/docs/source/zh/guided_tour.md b/docs/source/zh/guided_tour.md index ead192e37..07988fee0 100644 --- a/docs/source/zh/guided_tour.md +++ b/docs/source/zh/guided_tour.md @@ -17,33 +17,33 @@ rendered properly in your Markdown viewer. [[open-in-colab]] -在本导览中,您将学习如何构建一个agent(智能体),如何运行它,以及如何自定义它以使其更好地适应您的使用场景。 +在本导览中,您将学习如何构建一个 agent(智能体),如何运行它,以及如何自定义它以使其更好地适应您的使用场景。 > [!TIP] -> 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) +> 译者注:Agent 的业内术语是“智能体”。本译文将保留 agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) > [!TIP] -> 中文社区发布了关于smolagents的介绍和实践讲解视频(来源:[Issue#80](https://github.com/huggingface/smolagents/issues/80)),你可以访问[这里](https://www.youtube.com/watch?v=wwN3oAugc4c)进行观看! +> 中文社区发布了关于 smolagents 的介绍和实践讲解视频(来源:[Issue#80](https://github.com/huggingface/smolagents/issues/80)),你可以访问[这里](https://www.youtube.com/watch?v=wwN3oAugc4c)进行观看! -### 构建您的agent +### 构建您的 agent -要初始化一个最小化的agent,您至少需要以下两个参数: +要初始化一个最小化的 agent,您至少需要以下两个参数: -- `model`,一个为您的agent提供动力的文本生成模型 - 因为agent与简单的LLM不同,它是一个使用LLM作为引擎的系统。您可以使用以下任一选项: - - [`TransformersModel`] 使用预初始化的`transformers`管道在本地机器上运行推理 - - [`HfApiModel`] 在底层使用`huggingface_hub.InferenceClient` - - [`LiteLLMModel`] 让您通过[LiteLLM](https://docs.litellm.ai/)调用100+不同的模型! +- `model`,一个为您的 agent 提供动力的文本生成模型 - 因为 agent 与简单的 LLM 不同,它是一个使用 LLM 作为引擎的系统。您可以使用以下任一选项: + - [`TransformersModel`] 使用预初始化的 `transformers` 管道在本地机器上运行推理 + - [`HfApiModel`] 在底层使用 `huggingface_hub.InferenceClient` + - [`LiteLLMModel`] 让您通过 [LiteLLM](https://docs.litellm.ai/) 调用 100+ 不同的模型! -- `tools`,agent可以用来解决任务的`Tools`列表。它可以是一个空列表。您还可以通过定义可选参数`add_base_tools=True`在您的`tools`列表之上添加默认工具箱。 +- `tools`,agent 可以用来解决任务的 `Tools` 列表。它可以是一个空列表。您还可以通过定义可选参数 `add_base_tools=True` 在您的 `tools` 列表之上添加默认工具箱。 -一旦有了这两个参数`tools`和`model`,您就可以创建一个agent并运行它。您可以使用任何您喜欢的LLM,无论是通过[Hugging Face API](https://huggingface.co/docs/api-inference/en/index)、[transformers](https://github.com/huggingface/transformers/)、[ollama](https://ollama.com/),还是[LiteLLM](https://www.litellm.ai/)。 +一旦有了这两个参数 `tools` 和 `model`,您就可以创建一个 agent 并运行它。您可以使用任何您喜欢的 LLM,无论是通过 [Hugging Face API](https://huggingface.co/docs/api-inference/en/index)、[transformers](https://github.com/huggingface/transformers/)、[ollama](https://ollama.com/),还是 [LiteLLM](https://www.litellm.ai/)。 <hfoptions id="选择一个LLM"> <hfoption id="Hugging Face API"> -Hugging Face API可以免费使用而无需token,但会有速率限制。 +Hugging Face API 可以免费使用而无需 token,但会有速率限制。 -要访问受限模型或使用PRO账户提高速率限制,您需要设置环境变量`HF_TOKEN`或在初始化`HfApiModel`时传递`token`变量。 +要访问受限模型或使用 PRO 账户提高速率限制,您需要设置环境变量 `HF_TOKEN` 或在初始化 `HfApiModel` 时传递 `token` 变量。 ```python from smolagents import CodeAgent, HfApiModel @@ -75,12 +75,12 @@ agent.run( </hfoption> <hfoption id="OpenAI或Anthropic API"> -要使用`LiteLLMModel`,您需要设置环境变量`ANTHROPIC_API_KEY`或`OPENAI_API_KEY`,或者在初始化时传递`api_key`变量。 +要使用 `LiteLLMModel`,您需要设置环境变量 `ANTHROPIC_API_KEY` 或 `OPENAI_API_KEY`,或者在初始化时传递 `api_key` 变量。 ```python from smolagents import CodeAgent, LiteLLMModel -model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", api_key="YOUR_ANTHROPIC_API_KEY") # 也可以使用'gpt-4o' +model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", api_key="YOUR_ANTHROPIC_API_KEY") # 也可以使用 'gpt-4o' agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.run( @@ -94,9 +94,9 @@ agent.run( from smolagents import CodeAgent, LiteLLMModel model = LiteLLMModel( - model_id="ollama_chat/llama3.2", # 这个模型对于agent行为来说有点弱 - api_base="http://localhost:11434", # 如果需要可以替换为远程open-ai兼容服务器 - api_key="YOUR_API_KEY" # 如果需要可以替换为API key + model_id="ollama_chat/llama3.2", # 这个模型对于 agent 行为来说有点弱 + api_base="http://localhost:11434", # 如果需要可以替换为远程 open-ai 兼容服务器 + api_key="YOUR_API_KEY" # 如果需要可以替换为 API key ) agent = CodeAgent(tools=[], model=model, add_base_tools=True) @@ -108,15 +108,15 @@ agent.run( </hfoption> </hfoptions> -#### CodeAgent和ToolCallingAgent +#### CodeAgent 和 ToolCallingAgent -[`CodeAgent`]是我们的默认agent。它将在每一步编写并执行Python代码片段。 +[`CodeAgent`] 是我们的默认 agent。它将在每一步编写并执行 Python 代码片段。 默认情况下,执行是在您的本地环境中完成的。 -这应该是安全的,因为唯一可以调用的函数是您提供的工具(特别是如果只有Hugging Face的工具)和一组预定义的安全函数,如`print`或`math`模块中的函数,所以您已经限制了可以执行的内容。 +这应该是安全的,因为唯一可以调用的函数是您提供的工具(特别是如果只有 Hugging Face 的工具)和一组预定义的安全函数,如 `print` 或 `math` 模块中的函数,所以您已经限制了可以执行的内容。 -Python解释器默认也不允许在安全列表之外导入,所以所有最明显的攻击都不应该成为问题。 -您可以通过在初始化[`CodeAgent`]时将授权模块作为字符串列表传递给参数`additional_authorized_imports`来授权额外的导入: +Python 解释器默认也不允许在安全列表之外导入,所以所有最明显的攻击都不应该成为问题。 +您可以通过在初始化 [`CodeAgent`] 时将授权模块作为字符串列表传递给参数 `additional_authorized_imports` 来授权额外的导入: ```py from smolagents import CodeAgent @@ -126,16 +126,16 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co ``` > [!WARNING] -> LLM可以生成任意代码然后执行:不要添加任何不安全的导入! +> LLM 可以生成任意代码然后执行:不要添加任何不安全的导入! -如果生成的代码尝试执行非法操作或出现常规Python错误,执行将停止。 +如果生成的代码尝试执行非法操作或出现常规 Python 错误,执行将停止。 -您也可以使用[E2B代码执行器](https://e2b.dev/docs#what-is-e2-b)而不是本地Python解释器,首先[设置`E2B_API_KEY`环境变量](https://e2b.dev/dashboard?tab=keys),然后在初始化agent时传递`use_e2b_executor=True`。 +您也可以使用 [E2B 代码执行器](https://e2b.dev/docs#what-is-e2-b) 而不是本地 Python 解释器,首先 [设置 `E2B_API_KEY` 环境变量](https://e2b.dev/dashboard?tab=keys),然后在初始化 agent 时传递 `use_e2b_executor=True`。 > [!TIP] -> 在[该教程中](tutorials/secure_code_execution)了解更多关于代码执行的内容。 +> 在 [该教程中](tutorials/secure_code_execution) 了解更多关于代码执行的内容。 -我们还支持广泛使用的将动作编写为JSON-like块的方式:[`ToolCallingAgent`],它的工作方式与[`CodeAgent`]非常相似,当然没有`additional_authorized_imports`,因为它不执行代码: +我们还支持广泛使用的将动作编写为 JSON-like 块的方式:[`ToolCallingAgent`],它的工作方式与 [`CodeAgent`] 非常相似,当然没有 `additional_authorized_imports`,因为它不执行代码: ```py from smolagents import ToolCallingAgent @@ -144,33 +144,33 @@ agent = ToolCallingAgent(tools=[], model=model) agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") ``` -### 检查agent运行 +### 检查 agent 运行 以下是一些有用的属性,用于检查运行后发生了什么: -- `agent.logs`存储agent的细粒度日志。在agent运行的每一步,所有内容都会存储在一个字典中,然后附加到`agent.logs`中。 -- 运行`agent.write_inner_memory_from_logs()`会为LLM创建一个agent日志的内部内存,作为聊天消息列表。此方法会遍历日志的每一步,并仅存储它感兴趣的内容作为消息:例如,它会将系统提示和任务存储为单独的消息,然后对于每一步,它会将LLM输出存储为一条消息,工具调用输出存储为另一条消息。如果您想要更高级别的视图 - 但不是每个日志都会被此方法转录。 +- `agent.logs` 存储 agent 的细粒度日志。在 agent 运行的每一步,所有内容都会存储在一个字典中,然后附加到 `agent.logs` 中。 +- 运行 `agent.write_inner_memory_from_logs()` 会为 LLM 创建一个 agent 日志的内部内存,作为聊天消息列表。此方法会遍历日志的每一步,并仅存储它感兴趣的内容作为消息:例如,它会将系统提示和任务存储为单独的消息,然后对于每一步,它会将 LLM 输出存储为一条消息,工具调用输出存储为另一条消息。如果您想要更高级别的视图 - 但不是每个日志都会被此方法转录。 ## 工具 -工具是agent使用的原子函数。为了被LLM使用,它还需要一些构成其API的属性,这些属性将用于向LLM描述如何调用此工具: +工具是 agent 使用的原子函数。为了被 LLM 使用,它还需要一些构成其 API 的属性,这些属性将用于向 LLM 描述如何调用此工具: - 名称 - 描述 - 输入类型和描述 - 输出类型 -例如,您可以查看[`PythonInterpreterTool`]:它有一个名称、描述、输入描述、输出类型和一个执行操作的`forward`方法。 +例如,您可以查看 [`PythonInterpreterTool`]:它有一个名称、描述、输入描述、输出类型和一个执行操作的 `forward` 方法。 -当agent初始化时,工具属性用于生成工具描述,该描述被嵌入到agent的系统提示中。这让agent知道它可以使用哪些工具以及为什么。 +当 agent 初始化时,工具属性用于生成工具描述,该描述被嵌入到 agent 的系统提示中。这让 agent 知道它可以使用哪些工具以及为什么。 ### 默认工具箱 -Transformers附带了一个用于增强agent的默认工具箱,您可以在初始化时通过参数`add_base_tools = True`将其添加到您的agent中: +Transformers 附带了一个用于增强 agent 的默认工具箱,您可以在初始化时通过参数 `add_base_tools = True` 将其添加到您的 agent 中: -- **DuckDuckGo网页搜索**:使用DuckDuckGo浏览器执行网页搜索。 -- **Python代码解释器**:在安全环境中运行LLM生成的Python代码。只有在使用`add_base_tools=True`初始化[`ToolCallingAgent`]时才会添加此工具,因为基于代码的agent已经可以原生执行Python代码 -- **转录器**:基于Whisper-Turbo构建的语音转文本管道,将音频转录为文本。 +- **DuckDuckGo 网页搜索**:使用 DuckDuckGo 浏览器执行网页搜索。 +- **Python 代码解释器**:在安全环境中运行 LLM 生成的 Python 代码。只有在使用 `add_base_tools=True` 初始化 [`ToolCallingAgent`] 时才会添加此工具,因为基于代码的 agent 已经可以原生执行 Python 代码 +- **转录器**:基于 Whisper-Turbo 构建的语音转文本管道,将音频转录为文本。 -您可以通过调用[`load_tool`]函数和要执行的任务手动使用工具。 +您可以通过调用 [`load_tool`] 函数和要执行的任务手动使用工具。 ```python from smolagents import load_tool @@ -181,8 +181,8 @@ print(search_tool("Who's the current president of Russia?")) ### 创建一个新工具 -您可以创建自己的工具,用于Hugging Face默认工具未涵盖的用例。 -例如,让我们创建一个工具,返回Hub上给定任务下载量最多的模型。 +您可以创建自己的工具,用于 Hugging Face 默认工具未涵盖的用例。 +例如,让我们创建一个工具,返回 Hub 上给定任务下载量最多的模型。 您将从以下代码开始。 @@ -195,8 +195,8 @@ most_downloaded_model = next(iter(list_models(filter=task, sort="downloads", dir print(most_downloaded_model.id) ``` -这段代码可以通过将其包装在一个函数中并添加`tool`装饰器快速转换为工具: -这不是构建工具的唯一方法:您可以直接将其定义为[`Tool`]的子类,这为您提供了更多的灵活性,例如初始化重型类属性的可能性。 +这段代码可以通过将其包装在一个函数中并添加 `tool` 装饰器快速转换为工具: +这不是构建工具的唯一方法:您可以直接将其定义为 [`Tool`] 的子类,这为您提供了更多的灵活性,例如初始化重型类属性的可能性。 让我们看看这两种选项的工作原理: @@ -220,13 +220,13 @@ def model_download_tool(task: str) -> str: ``` 该函数需要: -- 一个清晰的名称。名称应该足够描述此工具的功能,以帮助为agent提供动力的LLM。由于此工具返回任务下载量最多的模型,我们将其命名为`model_download_tool`。 +- 一个清晰的名称。名称应该足够描述此工具的功能,以帮助为 agent 提供动力的 LLM。由于此工具返回任务下载量最多的模型,我们将其命名为 `model_download_tool`。 - 输入和输出的类型提示 -- 一个描述,其中包括一个'Args:'部分,其中每个参数都被描述(这次没有类型指示,它将从类型提示中提取)。与工具名称一样,此描述是为您的agent提供动力的LLM的说明书,所以不要忽视它。 -所有这些元素将在初始化时自动嵌入到agent的系统提示中:因此要努力使它们尽可能清晰! +- 一个描述,其中包括一个 'Args:' 部分,其中每个参数都被描述(这次没有类型指示,它将从类型提示中提取)。与工具名称一样,此描述是为您的 agent 提供动力的 LLM 的说明书,所以不要忽视它。 +所有这些元素将在初始化时自动嵌入到 agent 的系统提示中:因此要努力使它们尽可能清晰! > [!TIP] -> 此定义格式与`apply_chat_template`中使用的工具模式相同,唯一的区别是添加了`tool`装饰器:[这里](https://huggingface.co/blog/unified-tool-use#passing-tools-to-a-chat-template)了解更多关于我们的工具使用API。 +> 此定义格式与 `apply_chat_template` 中使用的工具模式相同,唯一的区别是添加了 `tool` 装饰器:[这里](https://huggingface.co/blog/unified-tool-use#passing-tools-to-a-chat-template) 了解更多关于我们的工具使用 API。 </hfoption> <hfoption id="子类化Tool"> @@ -245,16 +245,16 @@ class ModelDownloadTool(Tool): ``` 子类需要以下属性: -- 一个清晰的`name`。名称应该足够描述此工具的功能,以帮助为agent提供动力的LLM。由于此工具返回任务下载量最多的模型,我们将其命名为`model_download_tool`。 -- 一个`description`。与`name`一样,此描述是为您的agent提供动力的LLM的说明书,所以不要忽视它。 +- 一个清晰的 `name`。名称应该足够描述此工具的功能,以帮助为 agent 提供动力的 LLM。由于此工具返回任务下载量最多的模型,我们将其命名为 `model_download_tool`。 +- 一个 `description`。与 `name` 一样,此描述是为您的 agent 提供动力的 LLM 的说明书,所以不要忽视它。 - 输入类型和描述 - 输出类型 -所有这些属性将在初始化时自动嵌入到agent的系统提示中:因此要努力使它们尽可能清晰! +所有这些属性将在初始化时自动嵌入到 agent 的系统提示中:因此要努力使它们尽可能清晰! </hfoption> </hfoptions> -然后您可以直接初始化您的agent: +然后您可以直接初始化您的 agent: ```py from smolagents import CodeAgent, HfApiModel agent = CodeAgent(tools=[model_download_tool], model=HfApiModel()) @@ -271,7 +271,7 @@ agent.run( │ task on the Hugging Face Hub? │ │ │ ╰─ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct ───────────────────────────────────────────╯ -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 0 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮ │ 1 model_name = model_download_tool(task="text-to-video") │ │ 2 print(model_name) │ @@ -281,7 +281,7 @@ ByteDance/AnimateDiff-Lightning Out: None [Step 0: Duration 0.27 seconds| Input tokens: 2,069 | Output tokens: 60] -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ╭─ Executing this code: ───────────────────────────────────────────────────────────────────╮ │ 1 final_answer("ByteDance/AnimateDiff-Lightning") │ ╰──────────────────────────────────────────────────────────────────────────────────────────╯ @@ -291,20 +291,20 @@ Out[20]: 'ByteDance/AnimateDiff-Lightning' ``` > [!TIP] -> 在[专用教程](./tutorials/tools#what-is-a-tool-and-how-to-build-one)中了解更多关于工具的内容。 +> 在 [专用教程](./tutorials/tools#what-is-a-tool-and-how-to-build-one) 中了解更多关于工具的内容。 -## 多agent +## 多 agent -多agent系统是随着微软的框架[Autogen](https://huggingface.co/papers/2308.08155)引入的。 +多 agent 系统是随着微软的框架 [Autogen](https://huggingface.co/papers/2308.08155) 引入的。 -在这种类型的框架中,您有多个agent一起工作来解决您的任务,而不是只有一个。 -经验表明,这在大多数基准测试中表现更好。这种更好表现的原因在概念上很简单:对于许多任务,与其使用一个全能系统,您更愿意将单元专门用于子任务。在这里,拥有具有单独工具集和内存的agent可以实现高效的专业化。例如,为什么要用网页搜索agent访问的所有网页内容填充代码生成agent的内存?最好将它们分开。 +在这种类型的框架中,您有多个 agent 一起工作来解决您的任务,而不是只有一个。 +经验表明,这在大多数基准测试中表现更好。这种更好表现的原因在概念上很简单:对于许多任务,与其使用一个全能系统,您更愿意将单元专门用于子任务。在这里,拥有具有单独工具集和内存的 agent 可以实现高效的专业化。例如,为什么要用网页搜索 agent 访问的所有网页内容填充代码生成 agent 的内存?最好将它们分开。 -您可以使用`smolagents`轻松构建分层多agent系统。 +您可以使用 `smolagents` 轻松构建分层多 agent 系统。 -为此,将agent封装在[`ManagedAgent`]对象中。此对象需要参数`agent`、`name`和`description`,这些参数将嵌入到管理agent的系统提示中,以让它知道如何调用此托管agent,就像我们对工具所做的那样。 +为此,将 agent 封装在 [`ManagedAgent`] 对象中。此对象需要参数 `agent`、`name` 和 `description`,这些参数将嵌入到管理 agent 的系统提示中,以让它知道如何调用此托管 agent,就像我们对工具所做的那样。 -以下是一个使用我们的[`DuckDuckGoSearchTool`]制作一个管理特定网页搜索agent的agent的示例: +以下是一个使用我们的 [`DuckDuckGoSearchTool`] 制作一个管理特定网页搜索 agent 的 agent 的示例: ```py from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent @@ -327,12 +327,12 @@ manager_agent.run("Who is the CEO of Hugging Face?") ``` > [!TIP] -> 有关高效多agent实现的深入示例,请参阅[我们如何将多agent系统推向GAIA排行榜的顶部](https://huggingface.co/blog/beating-gaia)。 +> 有关高效多 agent 实现的深入示例,请参阅 [我们如何将多 agent 系统推向 GAIA 排行榜的顶部](https://huggingface.co/blog/beating-gaia)。 -## 与您的agent交谈并在酷炫的Gradio界面中可视化其思考过程 +## 与您的 agent 交谈并在酷炫的 Gradio 界面中可视化其思考过程 -您可以使用`GradioUI`交互式地向您的agent提交任务并观察其思考和执行过程,以下是一个示例: +您可以使用 `GradioUI` 交互式地向您的 agent 提交任务并观察其思考和执行过程,以下是一个示例: ```py from smolagents import ( @@ -342,25 +342,25 @@ from smolagents import ( GradioUI ) -# 从Hub导入工具 +# 从 Hub 导入工具 image_generation_tool = load_tool("m-ric/text-to-image") model = HfApiModel(model_id) -# 使用图像生成工具初始化agent +# 使用图像生成工具初始化 agent agent = CodeAgent(tools=[image_generation_tool], model=model) GradioUI(agent).launch() ``` -在底层,当用户输入新答案时,agent会以`agent.run(user_request, reset=False)`启动。 -`reset=False`标志意味着在启动此新任务之前不会刷新agent的内存,这使得对话可以继续。 +在底层,当用户输入新答案时,agent 会以 `agent.run(user_request, reset=False)` 启动。 +`reset=False` 标志意味着在启动此新任务之前不会刷新 agent 的内存,这使得对话可以继续。 -您也可以在其他agent化应用程序中使用此`reset=False`参数来保持对话继续。 +您也可以在其他 agent 化应用程序中使用此 `reset=False` 参数来保持对话继续。 ## 下一步 要更深入地使用,您将需要查看我们的教程: -- [我们的代码agent如何工作的解释](./tutorials/secure_code_execution) -- [本指南关于如何构建好的agent](./tutorials/building_good_agents)。 +- [我们的代码 agent 如何工作的解释](./tutorials/secure_code_execution) +- [本指南关于如何构建好的 agent](./tutorials/building_good_agents)。 - [工具使用的深入指南](./tutorials/tools)。 diff --git a/docs/source/zh/index.md b/docs/source/zh/index.md index 6d6f1d65e..d79e8090c 100644 --- a/docs/source/zh/index.md +++ b/docs/source/zh/index.md @@ -15,18 +15,18 @@ rendered properly in your Markdown viewer. # `smolagents` -这是构建强大agent的最简单框架!顺便问一下,什么是"agent"?我们在[此页面](conceptual_guides/intro_agents)提供了我们的定义,您还可以找到关于何时使用或不使用它们的建议(剧透:通常不使用agent会更好)。 +这是构建强大 agent 的最简单框架!顺便问一下,什么是 "agent"?我们在[此页面](conceptual_guides/intro_agents)提供了我们的定义,您还可以找到关于何时使用或不使用它们的建议(剧透:通常不使用 agent 会更好)。 > [!TIP] -> 译者注:Agent的业内术语是“智能体”。本译文将保留agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) +> 译者注:Agent 的业内术语是“智能体”。本译文将保留 agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) 本库提供: -✨ **简洁性**:Agent逻辑仅需约千行代码。我们将抽象保持在原始代码之上的最小形态! +✨ **简洁性**:Agent 逻辑仅需约千行代码。我们将抽象保持在原始代码之上的最小形态! -🌐 **支持任何 LLM**:支持通过 Hub 托管的模型,使用其 `transformers` 版本或通过我们的推理 API 加载,也支持 OpenAI、Anthropic 等模型。使用任何 LLM 为agent提供动力都非常容易。 +🌐 **支持任何 LLM**:支持通过 Hub 托管的模型,使用其 `transformers` 版本或通过我们的推理 API 加载,也支持 OpenAI、Anthropic 等模型。使用任何 LLM 为 agent 提供动力都非常容易。 -🧑💻 **一流的代码agent支持**,即编写代码作为其操作的agent(与"用于编写代码的agent"相对),[在此了解更多](tutorials/secure_code_execution)。 +🧑💻 **一流的代码 agent 支持**,即编写代码作为其操作的 agent(与"用于编写代码的 agent"相对),[在此了解更多](tutorials/secure_code_execution)。 🤗 **Hub 集成**:您可以在 Hub 上共享和加载工具,更多功能即将推出! @@ -34,11 +34,11 @@ rendered properly in your Markdown viewer. <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5"> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guided_tour" ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">导览</div> - <p class="text-gray-700">学习基础知识并熟悉使用agent。如果您是第一次使用agent,请从这里开始!</p> + <p class="text-gray-700">学习基础知识并熟悉使用 agent。如果您是第一次使用 agent,请从这里开始!</p> </a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./examples/text_to_sql" ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">操作指南</div> - <p class="text-gray-700">实用指南,帮助您实现特定目标:创建一个生成和测试 SQL 查询的agent!</p> + <p class="text-gray-700">实用指南,帮助您实现特定目标:创建一个生成和测试 SQL 查询的 agent!</p> </a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual_guides/intro_agents" ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">概念指南</div> @@ -46,7 +46,7 @@ rendered properly in your Markdown viewer. </a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/building_good_agents" ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">教程</div> - <p class="text-gray-700">涵盖构建agent重要方面的横向教程。</p> + <p class="text-gray-700">涵盖构建 agent 重要方面的横向教程。</p> </a> </div> </div> diff --git a/docs/source/zh/tutorials/building_good_agents.md b/docs/source/zh/tutorials/building_good_agents.md index 336f873e3..47cd202a0 100644 --- a/docs/source/zh/tutorials/building_good_agents.md +++ b/docs/source/zh/tutorials/building_good_agents.md @@ -13,47 +13,47 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# 构建好用的agent +# 构建好用的 agent [[open-in-colab]] -能良好工作的agent和不能工作的agent之间,有天壤之别。 -我们怎么样才能构建出属于前者的agent呢? -在本指南中,我们将看到构建agent的最佳实践。 +能良好工作的 agent 和不能工作的 agent 之间,有天壤之别。 +我们怎么样才能构建出属于前者的 agent 呢? +在本指南中,我们将看到构建 agent 的最佳实践。 > [!TIP] -> 如果你是agent构建的新手,请确保首先阅读[agent介绍](../conceptual_guides/intro_agents)和[smolagents导览](../guided_tour)。 +> 如果你是 agent 构建的新手,请确保首先阅读 [agent 介绍](../conceptual_guides/intro_agents) 和 [smolagents 导览](../guided_tour)。 -### 最好的agent系统是最简单的:尽可能简化工作流 +### 最好的 agent 系统是最简单的:尽可能简化工作流 -在你的工作流中赋予LLM一些自主权,会引入一些错误风险。 +在你的工作流中赋予 LLM 一些自主权,会引入一些错误风险。 -经过良好编程的agent系统,通常具有良好的错误日志记录和重试机制,因此LLM引擎有机会自我纠错。但为了最大限度地降低LLM错误的风险,你应该简化你的工作流! +经过良好编程的 agent 系统,通常具有良好的错误日志记录和重试机制,因此 LLM 引擎有机会自我纠错。但为了最大限度地降低 LLM 错误的风险,你应该简化你的工作流! -让我们回顾一下[agent介绍](../conceptual_guides/intro_agents)中的例子:一个为冲浪旅行公司回答用户咨询的机器人。 -与其让agent每次被问及新的冲浪地点时,都分别调用"旅行距离API"和"天气API",你可以只创建一个统一的工具"return_spot_information",一个同时调用这两个API,并返回它们连接输出的函数。 +让我们回顾一下 [agent 介绍](../conceptual_guides/intro_agents) 中的例子:一个为冲浪旅行公司回答用户咨询的机器人。 +与其让 agent 每次被问及新的冲浪地点时,都分别调用 "旅行距离 API" 和 "天气 API",你可以只创建一个统一的工具 "return_spot_information",一个同时调用这两个 API,并返回它们连接输出的函数。 这可以降低成本、延迟和错误风险! -主要的指导原则是:尽可能减少LLM调用的次数。 +主要的指导原则是:尽可能减少 LLM 调用的次数。 这可以带来一些启发: -- 尽可能把两个工具合并为一个,就像我们两个API的例子。 -- 尽可能基于确定性函数,而不是agent决策,来实现逻辑。 +- 尽可能把两个工具合并为一个,就像我们两个 API 的例子。 +- 尽可能基于确定性函数,而不是 agent 决策,来实现逻辑。 -### 改善流向LLM引擎的信息流 +### 改善流向 LLM 引擎的信息流 -记住,你的LLM引擎就像一个~智能~机器人,被关在一个房间里,与外界唯一的交流方式是通过门缝传递的纸条。 +记住,你的 LLM 引擎就像一个 ~智能~ 机器人,被关在一个房间里,与外界唯一的交流方式是通过门缝传递的纸条。 如果你没有明确地将信息放入其提示中,它将不知道发生的任何事情。 所以首先要让你的任务非常清晰! -由于agent由LLM驱动,任务表述的微小变化可能会产生完全不同的结果。 +由于 agent 由 LLM 驱动,任务表述的微小变化可能会产生完全不同的结果。 -然后,改善工具使用中流向agent的信息流。 +然后,改善工具使用中流向 agent 的信息流。 需要遵循的具体指南: -- 每个工具都应该记录(只需在工具的`forward`方法中使用`print`语句)对LLM引擎可能有用的所有信息。 +- 每个工具都应该记录(只需在工具的 `forward` 方法中使用 `print` 语句)对 LLM 引擎可能有用的所有信息。 - 特别是,记录工具执行错误的详细信息会很有帮助! 例如,这里有一个根据位置和日期时间检索天气数据的工具: @@ -64,7 +64,7 @@ import datetime from smolagents import tool def get_weather_report_at_coordinates(coordinates, date_time): - # 虚拟函数,返回[温度(°C),降雨风险(0-1),浪高(m)] + # 虚拟函数,返回 [温度(°C),降雨风险(0-1),浪高(m)] return [28.0, 0.35, 0.85] def get_coordinates_from_location(location): @@ -86,12 +86,12 @@ def get_weather_api(location: str, date_time: str) -> str: ``` 为什么它不好? -- 没有说明`date_time`应该使用的格式 +- 没有说明 `date_time` 应该使用的格式 - 没有说明位置应该如何指定 -- 没有记录机制来处理明确的报错情况,如位置格式不正确或date_time格式不正确 +- 没有记录机制来处理明确的报错情况,如位置格式不正确或 date_time 格式不正确 - 输出格式难以理解 -如果工具调用失败,内存中记录的错误跟踪,可以帮助LLM逆向工程工具来修复错误。但为什么要让它做这么多繁重的工作呢? +如果工具调用失败,内存中记录的错误跟踪,可以帮助 LLM 逆向工程工具来修复错误。但为什么要让它做这么多繁重的工作呢? 构建这个工具的更好方式如下: ```python @@ -113,11 +113,11 @@ def get_weather_api(location: str, date_time: str) -> str: return f"Weather report for {location}, {date_time}: Temperature will be {temperature_celsius}°C, risk of rain is {risk_of_rain*100:.0f}%, wave height is {wave_height}m." ``` -一般来说,为了减轻LLM的负担,要问自己的好问题是:"如果我是一个第一次使用这个工具的傻瓜,使用这个工具编程并纠正自己的错误有多容易?"。 +一般来说,为了减轻 LLM 的负担,要问自己的好问题是:"如果我是一个第一次使用这个工具的傻瓜,使用这个工具编程并纠正自己的错误有多容易?"。 -### 给agent更多参数 +### 给 agent 更多参数 -除了简单的任务描述字符串外,你还可以使用`additional_args`参数传递任何类型的对象: +除了简单的任务描述字符串外,你还可以使用 `additional_args` 参数传递任何类型的对象: ```py from smolagents import CodeAgent, HfApiModel @@ -131,19 +131,19 @@ agent.run( additional_args={"mp3_sound_file_url":'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3'} ) ``` -例如,你可以使用这个`additional_args`参数传递你希望agent利用的图像或字符串。 +例如,你可以使用这个 `additional_args` 参数传递你希望 agent 利用的图像或字符串。 -## 如何调试你的agent +## 如何调试你的 agent -### 1. 使用更强大的LLM +### 1. 使用更强大的 LLM -在agent工作流中,有些错误是实际错误,有些则是你的LLM引擎没有正确推理的结果。 -例如,参考这个我要求创建一个汽车图片的`CodeAgent`的运行记录: +在 agent 工作流中,有些错误是实际错误,有些则是你的 LLM 引擎没有正确推理的结果。 +例如,参考这个我要求创建一个汽车图片的 `CodeAgent` 的运行记录: ```text ==================================================================================================== New task ==================================================================================================== Make me a cool car picture -──────────────────────────────────────────────────────────────────────────────────────────────────── New step ──────────────────────────────────────────────────────────────────────────────────────────────────── +──────────────────────────────────────────────────────────────────────────────────────────────────── New step ───────────────────────────────────────────────────────────────────────────────────────────────────── Agent is executing the code below: ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── image_generator(prompt="A cool, futuristic sports car with LED headlights, aerodynamic design, and vibrant color, high-res, photorealistic") ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── @@ -155,7 +155,7 @@ Step 1: - Time taken: 16.35 seconds - Input tokens: 1,383 - Output tokens: 77 -──────────────────────────────────────────────────────────────────────────────────────────────────── New step ──────────────────────────────────────────────────────────────────────────────────────────────────── +──────────────────────────────────────────────────────────────────────────────────────────────────── New step ───────────────────────────────────────────────────────────────────────────────────────────────────── Agent is executing the code below: ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── final_answer("/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png") ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── @@ -167,10 +167,10 @@ Final answer: /var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png ``` 用户看到的是返回了一个路径,而不是图像。 -这看起来像是系统的错误,但实际上agent系统并没有导致错误:只是LLM大脑犯了一个错误,没有把图像输出,保存到变量中。 +这看起来像是系统的错误,但实际上 agent 系统并没有导致错误:只是 LLM 大脑犯了一个错误,没有把图像输出,保存到变量中。 因此,它无法再次访问图像,只能利用保存图像时记录的路径,所以它返回的是路径,而不是图像。 -调试agent的第一步是"使用更强大的LLM"。像`Qwen2.5-72B-Instruct`这样的替代方案不会犯这种错误。 +调试 agent 的第一步是"使用更强大的 LLM"。像 `Qwen2.5-72B-Instruct` 这样的替代方案不会犯这种错误。 ### 2. 提供更多指导/更多信息 @@ -181,16 +181,16 @@ Final answer: 你需要一些额外的说明吗? 为了提供额外信息,我们不建议立即更改系统提示:默认系统提示有许多调整,除非你非常了解提示,否则你很容易翻车。 -更好的指导LLM引擎的方法是: +更好的指导 LLM 引擎的方法是: - 如果是关于要解决的任务:把所有细节添加到任务中。任务可以有几百页长。 -- 如果是关于如何使用工具:你的工具的description属性。 +- 如果是关于如何使用工具:你的工具的 description 属性。 ### 3. 更改系统提示(通常不建议) 如果上述说明不够,你可以更改系统提示。 -让我们看看它是如何工作的。例如,让我们检查[`CodeAgent`]的默认系统提示(下面的版本通过跳过零样本示例进行了缩短)。 +让我们看看它是如何工作的。例如,让我们检查 [`CodeAgent`] 的默认系统提示(下面的版本通过跳过零样本示例进行了缩短)。 ```python print(agent.system_prompt_template) @@ -232,12 +232,12 @@ Here are the rules you should always follow to solve your task: Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. ``` -如你所见,有一些占位符,如`"{{tool_descriptions}}"`:这些将在agent初始化时用于插入某些自动生成的工具或管理agent的描述。 +如你所见,有一些占位符,如 `"{{tool_descriptions}}"`:这些将在 agent 初始化时用于插入某些自动生成的工具或管理 agent 的描述。 -因此,虽然你可以通过将自定义提示作为参数传递给`system_prompt`参数来覆盖此系统提示模板,但你的新系统提示必须包含以下占位符: +因此,虽然你可以通过将自定义提示作为参数传递给 `system_prompt` 参数来覆盖此系统提示模板,但你的新系统提示必须包含以下占位符: - `"{{tool_descriptions}}"` 用于插入工具描述。 -- `"{{managed_agents_description}}"` 用于插入managed agent的描述(如果有)。 -- 仅限`CodeAgent`:`"{{authorized_imports}}"` 用于插入授权导入列表。 +- `"{{managed_agents_description}}"` 用于插入 managed agent 的描述(如果有)。 +- 仅限 `CodeAgent`:`"{{authorized_imports}}"` 用于插入授权导入列表。 然后你可以根据如下,更改系统提示: @@ -253,12 +253,12 @@ agent = CodeAgent( ) ``` -这也适用于[`ToolCallingAgent`]。 +这也适用于 [`ToolCallingAgent`]。 ### 4. 额外规划 -我们提供了一个用于补充规划步骤的模型,agent可以在正常操作步骤之间定期运行。在此步骤中,没有工具调用,LLM只是被要求更新它知道的事实列表,并根据这些事实反推它应该采取的下一步。 +我们提供了一个用于补充规划步骤的模型,agent 可以在正常操作步骤之间定期运行。在此步骤中,没有工具调用,LLM 只是被要求更新它知道的事实列表,并根据这些事实反推它应该采取的下一步。 ```py from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool @@ -266,7 +266,7 @@ from dotenv import load_dotenv load_dotenv() -# 从Hub导入工具 +# 从 Hub 导入工具 image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) search_tool = DuckDuckGoSearchTool() diff --git a/docs/source/zh/tutorials/secure_code_execution.md b/docs/source/zh/tutorials/secure_code_execution.md index 130ccd8e4..6017aefb9 100644 --- a/docs/source/zh/tutorials/secure_code_execution.md +++ b/docs/source/zh/tutorials/secure_code_execution.md @@ -18,30 +18,30 @@ rendered properly in your Markdown viewer. [[open-in-colab]] > [!TIP] -> 如果你是第一次构建agent,请先阅读[agent介绍](../conceptual_guides/intro_agents)和[smolagents 导览](../guided_tour)。 +> 如果你是第一次构建 agent,请先阅读 [agent 介绍](../conceptual_guides/intro_agents) 和 [smolagents 导览](../guided_tour)。 ### 代码智能体 -[多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [表明](https://huggingface.co/papers/2401.00812),让大语言模型用代码编写其动作(工具调用)比当前标准的工具调用格式要好得多,目前行业标准是"将动作写成包含工具名称和参数的JSON"的各种变体。 +[多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [表明](https://huggingface.co/papers/2401.00812),让大语言模型用代码编写其动作(工具调用)比当前标准的工具调用格式要好得多,目前行业标准是 "将动作写成包含工具名称和参数的 JSON" 的各种变体。 -为什么代码更好?因为我们专门为计算机执行的动作而设计编程语言。如果JSON片段是更好的方式,那么这个工具包就应该是用JSON片段编写的,魔鬼就会嘲笑我们。 +为什么代码更好?因为我们专门为计算机执行的动作而设计编程语言。如果 JSON 片段是更好的方式,那么这个工具包就应该是用 JSON 片段编写的,魔鬼就会嘲笑我们。 代码就是表达计算机动作的更好方式。它具有更好的: -- **组合性**:你能像定义Python函数那样,在JSON动作中嵌套其他JSON动作,或者定义一组JSON动作以便以后重用吗? -- **对象管理**:你如何在JSON中存储像`generate_image`这样的动作的输出? +- **组合性**:你能像定义 Python 函数那样,在 JSON 动作中嵌套其他 JSON 动作,或者定义一组 JSON 动作以便以后重用吗? +- **对象管理**:你如何在 JSON 中存储像 `generate_image` 这样的动作的输出? - **通用性**:代码是为了简单地表达任何可以让计算机做的事情而构建的。 -- **在LLM训练语料库中的表示**:天赐良机,为什么不利用已经包含在LLM训练语料库中的大量高质量动作呢? +- **在 LLM 训练语料库中的表示**:天赐良机,为什么不利用已经包含在 LLM 训练语料库中的大量高质量动作呢? -下图展示了这一点,取自[可执行代码动作引出更好的LLM智能体](https://huggingface.co/papers/2402.01030)。 +下图展示了这一点,取自 [可执行代码动作引出更好的 LLM 智能体](https://huggingface.co/papers/2402.01030)。 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png"> -这就是为什么我们强调提出代码智能体,在本例中是Python智能体,这意味着我们要在构建安全的Python解释器上投入更多精力。 +这就是为什么我们强调提出代码智能体,在本例中是 Python 智能体,这意味着我们要在构建安全的 Python 解释器上投入更多精力。 -### 本地Python解释器 +### 本地 Python 解释器 -默认情况下,`CodeAgent`会在你的环境中运行LLM生成的代码。 -这个执行不是由普通的Python解释器完成的:我们从零开始重新构建了一个更安全的`LocalPythonInterpreter`。 +默认情况下,`CodeAgent` 会在你的环境中运行 LLM 生成的代码。 +这个执行不是由普通的 Python 解释器完成的:我们从零开始重新构建了一个更安全的 `LocalPythonInterpreter`。 这个解释器通过以下方式设计以确保安全: - 将导入限制为用户显式传递的列表 - 限制操作次数以防止无限循环和资源膨胀 @@ -49,23 +49,23 @@ rendered properly in your Markdown viewer. 我们已经在许多用例中使用了这个解释器,从未观察到对环境造成任何损害。 -然而,这个解决方案并不是万无一失的:可以想象,如果LLM被微调用于恶意操作,仍然可能损害你的环境。例如,如果你允许像`Pillow`这样无害的包处理图像,LLM可能会生成数千张图像保存以膨胀你的硬盘。 -如果你自己选择了LLM引擎,这当然不太可能,但它可能会发生。 +然而,这个解决方案并不是万无一失的:可以想象,如果 LLM 被微调用于恶意操作,仍然可能损害你的环境。例如,如果你允许像 `Pillow` 这样无害的包处理图像,LLM 可能会生成数千张图像保存以膨胀你的硬盘。 +如果你自己选择了 LLM 引擎,这当然不太可能,但它可能会发生。 所以如果你想格外谨慎,可以使用下面描述的远程代码执行选项。 -### E2B代码执行器 +### E2B 代码执行器 -为了最大程度的安全性,你可以使用我们与E2B的集成在沙盒环境中运行代码。这是一个远程执行服务,可以在隔离的容器中运行你的代码,使代码无法影响你的本地环境。 +为了最大程度的安全性,你可以使用我们与 E2B 的集成在沙盒环境中运行代码。这是一个远程执行服务,可以在隔离的容器中运行你的代码,使代码无法影响你的本地环境。 -为此,你需要设置你的E2B账户并在环境变量中设置`E2B_API_KEY`。请前往[E2B快速入门文档](https://e2b.dev/docs/quickstart)了解更多信息。 +为此,你需要设置你的 E2B 账户并在环境变量中设置 `E2B_API_KEY`。请前往 [E2B 快速入门文档](https://e2b.dev/docs/quickstart) 了解更多信息。 -然后你可以通过`pip install e2b-code-interpreter python-dotenv`安装它。 +然后你可以通过 `pip install e2b-code-interpreter python-dotenv` 安装它。 现在你已经准备好了! -要将代码执行器设置为E2B,只需在初始化`CodeAgent`时传递标志`use_e2b_executor=True`。 -请注意,你应该将所有工具的依赖项添加到`additional_authorized_imports`中,以便执行器安装它们。 +要将代码执行器设置为 E2B,只需在初始化 `CodeAgent` 时传递标志 `use_e2b_executor=True`。 +请注意,你应该将所有工具的依赖项添加到 `additional_authorized_imports` 中,以便执行器安装它们。 ```py from smolagents import CodeAgent, VisitWebpageTool, HfApiModel @@ -79,4 +79,4 @@ agent = CodeAgent( agent.run("What was Abraham Lincoln's preferred pet?") ``` -目前E2B代码执行暂不兼容多agent——因为把agent调用放在应该在远程执行的代码块里,是非常混乱的。但我们正在努力做到这件事! +目前 E2B 代码执行暂不兼容多 agent——因为把 agent 调用放在应该在远程执行的代码块里,是非常混乱的。但我们正在努力做到这件事! diff --git a/docs/source/zh/tutorials/tools.md b/docs/source/zh/tutorials/tools.md index e87854a95..216d93b96 100644 --- a/docs/source/zh/tutorials/tools.md +++ b/docs/source/zh/tutorials/tools.md @@ -20,25 +20,25 @@ rendered properly in your Markdown viewer. 在这里,我们将学习高级工具的使用。 > [!TIP] -> 如果你是构建agent的新手,请确保先阅读[agent介绍](../conceptual_guides/intro_agents)和[smolagents导览](../guided_tour)。 +> 如果你是构建 agent 的新手,请确保先阅读 [agent 介绍](../conceptual_guides/intro_agents) 和 [smolagents 导览](../guided_tour)。 - [工具](#工具) - [什么是工具,如何构建一个工具?](#什么是工具如何构建一个工具) - - [将你的工具分享到Hub](#将你的工具分享到hub) - - [将Space导入为工具](#将space导入为工具) - - [使用LangChain工具](#使用langchain工具) - - [管理你的agent工具箱](#管理你的agent工具箱) + - [将你的工具分享到 Hub](#将你的工具分享到-hub) + - [将 Space 导入为工具](#将-space-导入为工具) + - [使用 LangChain 工具](#使用-langchain-工具) + - [管理你的 agent 工具箱](#管理你的-agent-工具箱) - [使用工具集合](#使用工具集合) ### 什么是工具,如何构建一个工具? -工具主要是LLM可以在agent系统中使用的函数。 +工具主要是 LLM 可以在 agent 系统中使用的函数。 -但要使用它,LLM需要被提供一个API:名称、工具描述、输入类型和描述、输出类型。 +但要使用它,LLM 需要被提供一个 API:名称、工具描述、输入类型和描述、输出类型。 所以它不能仅仅是一个函数。它应该是一个类。 -因此,核心上,工具是一个类,它包装了一个函数,并带有帮助LLM理解如何使用它的元数据。 +因此,核心上,工具是一个类,它包装了一个函数,并带有帮助 LLM 理解如何使用它的元数据。 以下是它的结构: @@ -67,38 +67,38 @@ class HFModelDownloadsTool(Tool): model_downloads_tool = HFModelDownloadsTool() ``` -自定义工具继承[`Tool`]以继承有用的方法。子类还定义了: -- 一个属性`name`,对应于工具本身的名称。名称通常描述工具的功能。由于代码返回任务中下载量最多的模型,我们将其命名为`model_download_counter`。 -- 一个属性`description`,用于填充agent的系统提示。 -- 一个`inputs`属性,它是一个带有键`"type"`和`"description"`的字典。它包含帮助Python解释器对输入做出明智选择的信息。 -- 一个`output_type`属性,指定输出类型。`inputs`和`output_type`的类型应为[Pydantic格式](https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema),它们可以是以下之一:[`~AUTHORIZED_TYPES`]。 -- 一个`forward`方法,包含要执行的推理代码。 +自定义工具继承 [`Tool`] 以继承有用的方法。子类还定义了: +- 一个属性 `name`,对应于工具本身的名称。名称通常描述工具的功能。由于代码返回任务中下载量最多的模型,我们将其命名为 `model_download_counter`。 +- 一个属性 `description`,用于填充 agent 的系统提示。 +- 一个 `inputs` 属性,它是一个带有键 `"type"` 和 `"description"` 的字典。它包含帮助 Python 解释器对输入做出明智选择的信息。 +- 一个 `output_type` 属性,指定输出类型。`inputs` 和 `output_type` 的类型应为 [Pydantic 格式](https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema),它们可以是以下之一:[`~AUTHORIZED_TYPES`]。 +- 一个 `forward` 方法,包含要执行的推理代码。 -这就是它在agent中使用所需的全部内容! +这就是它在 agent 中使用所需的全部内容! -还有另一种构建工具的方法。在[guided_tour](../guided_tour)中,我们使用`@tool`装饰器实现了一个工具。[`tool`]装饰器是定义简单工具的推荐方式,但有时你需要更多:在类中使用多个方法以获得更清晰的代码,或使用额外的类属性。 +还有另一种构建工具的方法。在 [guided_tour](../guided_tour) 中,我们使用 `@tool` 装饰器实现了一个工具。[`tool`] 装饰器是定义简单工具的推荐方式,但有时你需要更多:在类中使用多个方法以获得更清晰的代码,或使用额外的类属性。 -在这种情况下,你可以通过如上所述继承[`Tool`]来构建你的工具。 +在这种情况下,你可以通过如上所述继承 [`Tool`] 来构建你的工具。 -### 将你的工具分享到Hub +### 将你的工具分享到 Hub -你可以通过调用[`~Tool.push_to_hub`]将你的自定义工具分享到Hub。确保你已经在Hub上为其创建了一个仓库,并且使用的是具有读取权限的token。 +你可以通过调用 [`~Tool.push_to_hub`] 将你的自定义工具分享到 Hub。确保你已经在 Hub 上为其创建了一个仓库,并且使用的是具有读取权限的 token。 ```python model_downloads_tool.push_to_hub("{your_username}/hf-model-downloads", token="<YOUR_HUGGINGFACEHUB_API_TOKEN>") ``` -为了使推送到Hub正常工作,你的工具需要遵守一些规则: +为了使推送到 Hub 正常工作,你的工具需要遵守一些规则: - 所有方法都是自包含的,例如使用来自其参数中的变量。 -- 根据上述要点,**所有导入应直接在工具的函数中定义**,否则在尝试使用[`~Tool.save`]或[`~Tool.push_to_hub`]调用你的自定义工具时会出现错误。 -- 如果你继承了`__init__`方法,除了`self`之外,你不能给它任何其他参数。这是因为在特定工具实例初始化期间设置的参数很难跟踪,这阻碍了将它们正确分享到Hub。无论如何,创建特定类的想法是你已经可以为任何需要硬编码的内容设置类属性(只需在`class YourTool(Tool):`行下直接设置`your_variable=(...)`)。当然,你仍然可以通过将内容分配给`self.your_variable`在代码中的任何地方创建类属性。 +- 根据上述要点,**所有导入应直接在工具的函数中定义**,否则在尝试使用 [`~Tool.save`] 或 [`~Tool.push_to_hub`] 调用你的自定义工具时会出现错误。 +- 如果你继承了 `__init__` 方法,除了 `self` 之外,你不能给它任何其他参数。这是因为在特定工具实例初始化期间设置的参数很难跟踪,这阻碍了将它们正确分享到 Hub。无论如何,创建特定类的想法是你已经可以为任何需要硬编码的内容设置类属性(只需在 `class YourTool(Tool):` 行下直接设置 `your_variable=(...)`)。当然,你仍然可以通过将内容分配给 `self.your_variable` 在代码中的任何地方创建类属性。 -一旦你的工具被推送到Hub,你就可以查看它。[这里](https://huggingface.co/spaces/m-ric/hf-model-downloads)是我推送的`model_downloads_tool`。它有一个漂亮的gradio界面。 +一旦你的工具被推送到 Hub,你就可以查看它。[这里](https://huggingface.co/spaces/m-ric/hf-model-downloads) 是我推送的 `model_downloads_tool`。它有一个漂亮的 gradio 界面。 -在深入工具文件时,你可以发现所有工具的逻辑都在[tool.py](https://huggingface.co/spaces/m-ric/hf-model-downloads/blob/main/tool.py)下。这是你可以检查其他人分享的工具的地方。 +在深入工具文件时,你可以发现所有工具的逻辑都在 [tool.py](https://huggingface.co/spaces/m-ric/hf-model-downloads/blob/main/tool.py) 下。这是你可以检查其他人分享的工具的地方。 -然后你可以使用[`load_tool`]加载工具或使用[`~Tool.from_hub`]创建它,并将其传递给agent中的`tools`参数。 -由于运行工具意味着运行自定义代码,你需要确保你信任该仓库,因此我们需要传递`trust_remote_code=True`来从Hub加载工具。 +然后你可以使用 [`load_tool`] 加载工具或使用 [`~Tool.from_hub`] 创建它,并将其传递给 agent 中的 `tools` 参数。 +由于运行工具意味着运行自定义代码,你需要确保你信任该仓库,因此我们需要传递 `trust_remote_code=True` 来从 Hub 加载工具。 ```python from smolagents import load_tool, CodeAgent @@ -109,13 +109,13 @@ model_download_tool = load_tool( ) ``` -### 将Space导入为工具 +### 将 Space 导入为工具 -你可以使用[`Tool.from_space`]方法直接从Hub导入一个Space作为工具! +你可以使用 [`Tool.from_space`] 方法直接从 Hub 导入一个 Space 作为工具! -你只需要提供Hub上Space的id、它的名称和一个帮助你的agent理解工具功能的描述。在底层,这将使用[`gradio-client`](https://pypi.org/project/gradio-client/)库来调用Space。 +你只需要提供 Hub 上 Space 的 id、它的名称和一个帮助你的 agent 理解工具功能的描述。在底层,这将使用 [`gradio-client`](https://pypi.org/project/gradio-client/) 库来调用 Space。 -例如,让我们从Hub导入[FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space并使用它生成一张图片。 +例如,让我们从 Hub 导入 [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space 并使用它生成一张图片。 ```python image_generation_tool = Tool.from_space( @@ -130,7 +130,7 @@ image_generation_tool("A sunny beach") <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/sunny_beach.webp"> -然后你可以像使用任何其他工具一样使用这个工具。例如,让我们改进提示`A rabbit wearing a space suit`并生成它的图片。 +然后你可以像使用任何其他工具一样使用这个工具。例如,让我们改进提示 `A rabbit wearing a space suit` 并生成它的图片。 ```python from smolagents import CodeAgent, HfApiModel @@ -157,13 +157,13 @@ final_answer(image) 这得有多酷?🤩 -### 使用LangChain工具 +### 使用 LangChain 工具 -我们喜欢Langchain,并认为它有一套非常吸引人的工具。 -要从LangChain导入工具,请使用`from_langchain()`方法。 +我们喜欢 Langchain,并认为它有一套非常吸引人的工具。 +要从 LangChain 导入工具,请使用 `from_langchain()` 方法。 -以下是如何使用它来重现介绍中的搜索结果,使用LangChain的web搜索工具。 -这个工具需要`pip install langchain google-search-results -q`才能正常工作。 +以下是如何使用它来重现介绍中的搜索结果,使用 LangChain 的 web 搜索工具。 +这个工具需要 `pip install langchain google-search-results -q` 才能正常工作。 ```python from langchain.agents import load_tools @@ -174,11 +174,11 @@ agent = CodeAgent(tools=[search_tool], model=model) agent.run("How many more blocks (also denoted as layers) are in BERT base encoder compared to the encoder from the architecture proposed in Attention is All You Need?") ``` -### 管理你的agent工具箱 +### 管理你的 agent 工具箱 -你可以通过添加或替换工具来管理agent的工具箱。 +你可以通过添加或替换工具来管理 agent 的工具箱。 -让我们将`model_download_tool`添加到一个仅使用默认工具箱初始化的现有agent中。 +让我们将 `model_download_tool` 添加到一个仅使用默认工具箱初始化的现有 agent 中。 ```python from smolagents import HfApiModel @@ -198,13 +198,13 @@ agent.run( > [!TIP] -> 注意不要向agent添加太多工具:这可能会让较弱的LLM引擎不堪重负。 +> 注意不要向 agent 添加太多工具:这可能会让较弱的 LLM 引擎不堪重负。 ### 使用工具集合 -你可以通过使用ToolCollection对象来利用工具集合,使用你想要使用的集合的slug。 -然后将它们作为列表传递给agent初始化,并开始使用它们! +你可以通过使用 ToolCollection 对象来利用工具集合,使用你想要使用的集合的 slug。 +然后将它们作为列表传递给 agent 初始化,并开始使用它们! ```py from smolagents import ToolCollection, CodeAgent @@ -218,4 +218,4 @@ agent = CodeAgent(tools=[*image_tool_collection.tools], model=model, add_base_to agent.run("Please draw me a picture of rivers and lakes.") ``` -为了加快启动速度,工具仅在agent调用时加载。 +为了加快启动速度,工具仅在 agent 调用时加载。