diff --git a/notebooks/en/agent_data_analyst.ipynb b/notebooks/en/agent_data_analyst.ipynb
index 51ea24f7..94009260 100644
--- a/notebooks/en/agent_data_analyst.ipynb
+++ b/notebooks/en/agent_data_analyst.ipynb
@@ -42,17 +42,17 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "from smolagents import HfApiModel, CodeAgent\n",
+ "from smolagents import InferenceClientModel, CodeAgent\n",
"from huggingface_hub import login\n",
"import os\n",
"\n",
"login(os.getenv(\"HUGGINGFACEHUB_API_TOKEN\"))\n",
"\n",
- "model = HfApiModel(\"meta-llama/Llama-3.1-70B-Instruct\")\n",
+ "model = InferenceClientModel(\"meta-llama/Llama-3.1-70B-Instruct\")\n",
"\n",
"agent = CodeAgent(\n",
" tools=[],\n",
diff --git a/notebooks/en/agent_rag.ipynb b/notebooks/en/agent_rag.ipynb
index 5b481939..a442f157 100644
--- a/notebooks/en/agent_rag.ipynb
+++ b/notebooks/en/agent_rag.ipynb
@@ -219,7 +219,7 @@
"- *`tools`*: a list of tools that the agent will be able to call.\n",
"- *`model`*: the LLM that powers the agent.\n",
"\n",
- "Our `model` must be a callable that takes as input a list of [messages](https://huggingface.co/docs/transformers/main/chat_templating) and returns text. It also needs to accept a `stop_sequences` argument that indicates when to stop its generation. For convenience, we directly use the `HfApiModel` class provided in the package to get a LLM engine that calls our [Inference API](https://huggingface.co/docs/api-inference/en/index).\n",
+ "Our `model` must be a callable that takes as input a list of [messages](https://huggingface.co/docs/transformers/main/chat_templating) and returns text. It also needs to accept a `stop_sequences` argument that indicates when to stop its generation. For convenience, we directly use the `InferenceClientModel` class provided in the package to get a LLM engine that calls our [Inference API](https://huggingface.co/docs/api-inference/en/index).\n",
"\n",
"And we use [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct), served for free on Hugging Face's Inference API!\n",
"\n",
@@ -232,9 +232,9 @@
"metadata": {},
"outputs": [],
"source": [
- "from smolagents import HfApiModel, ToolCallingAgent\n",
+ "from smolagents import InferenceClientModel, ToolCallingAgent\n",
"\n",
- "model = HfApiModel(\"meta-llama/Llama-3.1-70B-Instruct\")\n",
+ "model = InferenceClientModel(\"meta-llama/Llama-3.1-70B-Instruct\")\n",
"\n",
"retriever_tool = RetrieverTool(vectordb)\n",
"agent = ToolCallingAgent(\n",
@@ -263,7 +263,7 @@
"│ │\n",
"│ How can I push a model to the Hub? │\n",
"│ │\n",
- "╰─ HfApiModel - meta-llama/Llama-3.1-70B-Instruct ────────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - meta-llama/Llama-3.1-70B-Instruct ────────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -271,7 +271,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mHow can I push a model to the Hub?\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - meta-llama/Llama-3.1-70B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - meta-llama/Llama-3.1-70B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
diff --git a/notebooks/en/agent_text_to_sql.ipynb b/notebooks/en/agent_text_to_sql.ipynb
index 32e50f59..efb53fd2 100644
--- a/notebooks/en/agent_text_to_sql.ipynb
+++ b/notebooks/en/agent_text_to_sql.ipynb
@@ -160,7 +160,7 @@
"\n",
"We use the `CodeAgent`, which is `transformers.agents`' main agent class: an agent that writes actions in code and can iterate on previous output according to the ReAct framework.\n",
"\n",
- "The `llm_engine` is the LLM that powers the agent system. `HfApiModel` allows you to call LLMs using Hugging Face's Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API: check out [this other cookbook](agent_change_llm) to learn how to adapt it."
+ "The `llm_engine` is the LLM that powers the agent system. `InferenceClientModel` allows you to call LLMs using Hugging Face's Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API: check out [this other cookbook](agent_change_llm) to learn how to adapt it."
]
},
{
@@ -169,11 +169,11 @@
"metadata": {},
"outputs": [],
"source": [
- "from smolagents import CodeAgent, HfApiModel\n",
+ "from smolagents import CodeAgent, InferenceClientModel\n",
"\n",
"agent = CodeAgent(\n",
" tools=[sql_engine],\n",
- " model=HfApiModel(\"meta-llama/Meta-Llama-3-8B-Instruct\"),\n",
+ " model=InferenceClientModel(\"meta-llama/Meta-Llama-3-8B-Instruct\"),\n",
")"
]
},
@@ -189,7 +189,7 @@
"│ │\n",
"│ Can you give me the name of the client who got the most expensive receipt? │\n",
"│ │\n",
- "╰─ HfApiModel - meta-llama/Meta-Llama-3-8B-Instruct ──────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - meta-llama/Meta-Llama-3-8B-Instruct ──────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -197,7 +197,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mCan you give me the name of the client who got the most expensive receipt?\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - meta-llama/Meta-Llama-3-8B-Instruct \u001b[0m\u001b[38;2;212;183;2m─────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - meta-llama/Meta-Llama-3-8B-Instruct \u001b[0m\u001b[38;2;212;183;2m─────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
@@ -396,7 +396,7 @@
"│ │\n",
"│ Which waiter got more total money from tips? │\n",
"│ │\n",
- "╰─ HfApiModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -404,7 +404,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mWhich waiter got more total money from tips?\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
@@ -740,7 +740,7 @@
"\n",
"agent = CodeAgent(\n",
" tools=[sql_engine],\n",
- " model=HfApiModel(\"Qwen/Qwen2.5-72B-Instruct\"),\n",
+ " model=InferenceClientModel(\"Qwen/Qwen2.5-72B-Instruct\"),\n",
")\n",
"\n",
"agent.run(\"Which waiter got more total money from tips?\")"
diff --git a/notebooks/en/agents.ipynb b/notebooks/en/agents.ipynb
index 82616038..049d079b 100644
--- a/notebooks/en/agents.ipynb
+++ b/notebooks/en/agents.ipynb
@@ -95,7 +95,7 @@
"│ │\n",
"│ Generate me a photo of the car that James bond drove in the latest movie. │\n",
"│ │\n",
- "╰─ HfApiModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -103,7 +103,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mGenerate me a photo of the car that James bond drove in the latest movie.\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
@@ -549,7 +549,7 @@
}
],
"source": [
- "from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool\n",
+ "from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool\n",
"\n",
"# Import tool from Hub\n",
"image_generation_tool = load_tool(\"m-ric/text-to-image\", trust_remote_code=True)\n",
@@ -557,7 +557,7 @@
"\n",
"search_tool = DuckDuckGoSearchTool()\n",
"\n",
- "model = HfApiModel(\"Qwen/Qwen2.5-72B-Instruct\")\n",
+ "model = InferenceClientModel(\"Qwen/Qwen2.5-72B-Instruct\")\n",
"# Initialize the agent with both tools\n",
"agent = CodeAgent(\n",
" tools=[image_generation_tool, search_tool], model=model\n",
@@ -796,9 +796,9 @@
"metadata": {},
"outputs": [],
"source": [
- "from smolagents import HfApiModel, ToolCallingAgent\n",
+ "from smolagents import InferenceClientModel, ToolCallingAgent\n",
"\n",
- "model = HfApiModel(\"Qwen/Qwen2.5-72B-Instruct\")\n",
+ "model = InferenceClientModel(\"Qwen/Qwen2.5-72B-Instruct\")\n",
"\n",
"retriever_tool = RetrieverTool(vectordb=vectordb, all_sources=all_sources)\n",
"agent = ToolCallingAgent(tools=[retriever_tool], model=model, verbose=0)\n",
@@ -840,7 +840,7 @@
"│ python code: │\n",
"│ {'code': '\\nnumbers=[0, 1, 2]\\n\\nfor i in range(4):\\n print(numbers(i))\\n'}. │\n",
"│ │\n",
- "╰─ HfApiModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -852,7 +852,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mpython code:\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1m{'code': '\\nnumbers=[0, 1, 2]\\n\\nfor i in range(4):\\n print(numbers(i))\\n'}.\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
@@ -1086,7 +1086,7 @@
"source": [
"from smolagents import CodeAgent\n",
"\n",
- "agent = CodeAgent(tools=[], model=HfApiModel(\"Qwen/Qwen2.5-72B-Instruct\"))\n",
+ "agent = CodeAgent(tools=[], model=InferenceClientModel(\"Qwen/Qwen2.5-72B-Instruct\"))\n",
"\n",
"code = \"\"\"\n",
"numbers=[0, 1, 2]\n",
diff --git a/notebooks/en/multiagent_rag_system.ipynb b/notebooks/en/multiagent_rag_system.ipynb
index f348aeee..60b41058 100644
--- a/notebooks/en/multiagent_rag_system.ipynb
+++ b/notebooks/en/multiagent_rag_system.ipynb
@@ -151,10 +151,10 @@
},
"outputs": [],
"source": [
- "from smolagents import HfApiModel\n",
+ "from smolagents import InferenceClientModel\n",
"\n",
"model_id = \"Qwen/Qwen2.5-72B-Instruct\"\n",
- "model = HfApiModel(model_id)"
+ "model = InferenceClientModel(model_id)"
]
},
{
diff --git a/notebooks/en/multiagent_web_assistant.ipynb b/notebooks/en/multiagent_web_assistant.ipynb
index 79796577..08acef03 100644
--- a/notebooks/en/multiagent_web_assistant.ipynb
+++ b/notebooks/en/multiagent_web_assistant.ipynb
@@ -199,12 +199,12 @@
"from smolagents import (\n",
" CodeAgent,\n",
" ToolCallingAgent,\n",
- " HfApiModel,\n",
+ " InferenceClientModel,\n",
" ManagedAgent,\n",
" DuckDuckGoSearchTool\n",
")\n",
"\n",
- "model = HfApiModel(model_id)\n",
+ "model = InferenceClientModel(model_id)\n",
"\n",
"web_agent = ToolCallingAgent(\n",
" tools=[DuckDuckGoSearchTool(), visit_webpage],\n",
@@ -277,7 +277,7 @@
"│ │\n",
"│ How many years ago was Stripe founded? │\n",
"│ │\n",
- "╰─ HfApiModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -285,7 +285,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mHow many years ago was Stripe founded?\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
@@ -348,7 +348,7 @@
"│ manager can act upon this feedback. │\n",
"│ {additional_prompting} │\n",
"│ │\n",
- "╰─ HfApiModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
+ "╰─ InferenceClientModel - Qwen/Qwen2.5-72B-Instruct ────────────────────────────────────────────────────────────────────────╯\n",
"\n"
],
"text/plain": [
@@ -374,7 +374,7 @@
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1mmanager can act upon this feedback.\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[1m{additional_prompting}\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
"\u001b[38;2;212;183;2m│\u001b[0m \u001b[38;2;212;183;2m│\u001b[0m\n",
- "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m HfApiModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
+ "\u001b[38;2;212;183;2m╰─\u001b[0m\u001b[38;2;212;183;2m InferenceClientModel - Qwen/Qwen2.5-72B-Instruct \u001b[0m\u001b[38;2;212;183;2m───────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;212;183;2m─╯\u001b[0m\n"
]
},
"metadata": {},
diff --git a/notebooks/zh-CN/agents.ipynb b/notebooks/zh-CN/agents.ipynb
index d9975ccf..e7b2d469 100644
--- a/notebooks/zh-CN/agents.ipynb
+++ b/notebooks/zh-CN/agents.ipynb
@@ -164,18 +164,18 @@
},
{
"ename": "ImportError",
- "evalue": "cannot import name 'HfApiModel' from 'transformers' (/Users/aymeric/.pyenv/versions/3.12.0/lib/python3.12/site-packages/transformers/__init__.py)",
+ "evalue": "cannot import name 'InferenceClientModel' from 'transformers' (/Users/aymeric/.pyenv/versions/3.12.0/lib/python3.12/site-packages/transformers/__init__.py)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tool, load_tool, CodeAgent, HfApiModel\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Import tool from Hub\u001b[39;00m\n\u001b[1;32m 4\u001b[0m image_generation_tool \u001b[38;5;241m=\u001b[39m load_tool(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mm-ric/text-to-image\u001b[39m\u001b[38;5;124m\"\u001b[39m, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
- "\u001b[0;31mImportError\u001b[0m: cannot import name 'HfApiModel' from 'transformers' (/Users/aymeric/.pyenv/versions/3.12.0/lib/python3.12/site-packages/transformers/__init__.py)"
+ "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tool, load_tool, CodeAgent, InferenceClientModel\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Import tool from Hub\u001b[39;00m\n\u001b[1;32m 4\u001b[0m image_generation_tool \u001b[38;5;241m=\u001b[39m load_tool(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mm-ric/text-to-image\u001b[39m\u001b[38;5;124m\"\u001b[39m, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+ "\u001b[0;31mImportError\u001b[0m: cannot import name 'InferenceClientModel' from 'transformers' (/Users/aymeric/.pyenv/versions/3.12.0/lib/python3.12/site-packages/transformers/__init__.py)"
]
}
],
"source": [
- "from transformers import Tool, load_tool, CodeAgent, HfApiModel\n",
+ "from transformers import Tool, load_tool, CodeAgent, InferenceClientModel\n",
"\n",
"# Import tool from Hub\n",
"image_generation_tool = load_tool(\"m-ric/text-to-image\", trust_remote_code=True)\n",
@@ -186,7 +186,7 @@
"search_tool = Tool.from_langchain(load_tools([\"serpapi\"])[0])\n",
"\n",
"\n",
- "model = HfApiModel(\"meta-llama/Llama-3.1-70B-Instruct\")\n",
+ "model = InferenceClientModel(\"meta-llama/Llama-3.1-70B-Instruct\")\n",
"# Initialize the agent with both tools\n",
"agent = CodeAgent(\n",
" tools=[image_generation_tool, search_tool], model=model\n",