Skip to content

Commit 36b3713

Browse files
feat: Notion toolkit integration (#1111)
Co-authored-by: Wendong-Fan <[email protected]> Co-authored-by: Wendong <[email protected]>
1 parent 6a47dcf commit 36b3713

File tree

7 files changed

+563
-13
lines changed

7 files changed

+563
-13
lines changed

camel/toolkits/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
)
2121
from .open_api_specs.security_config import openapi_security_config
2222

23-
2423
from .math_toolkit import MathToolkit, MATH_FUNCS
2524
from .search_toolkit import SearchToolkit, SEARCH_FUNCS
2625
from .weather_toolkit import WeatherToolkit, WEATHER_FUNCS
@@ -39,6 +38,7 @@
3938
from .twitter_toolkit import TwitterToolkit, TWITTER_FUNCS
4039
from .open_api_toolkit import OpenAPIToolkit
4140
from .retrieval_toolkit import RetrievalToolkit
41+
from .notion_toolkit import NotionToolkit
4242

4343
__all__ = [
4444
'BaseToolkit',
@@ -63,6 +63,7 @@
6363
'AskNewsToolkit',
6464
'AsyncAskNewsToolkit',
6565
'GoogleScholarToolkit',
66+
'NotionToolkit',
6667
'ArxivToolkit',
6768
'MATH_FUNCS',
6869
'SEARCH_FUNCS',

camel/toolkits/notion_toolkit.py

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2+
# Licensed under the Apache License, Version 2.0 (the “License”);
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an “AS IS” BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14+
import os
15+
from typing import List, Optional, cast
16+
17+
from camel.toolkits import FunctionTool
18+
from camel.toolkits.base import BaseToolkit
19+
20+
21+
def get_plain_text_from_rich_text(rich_text: List[dict]) -> str:
22+
r"""Extracts plain text from a list of rich text elements.
23+
24+
Args:
25+
rich_text: A list of dictionaries representing rich text elements.
26+
Each dictionary should contain a key named "plain_text" with
27+
the plain text content.
28+
29+
Returns:
30+
str: A string containing the combined plain text from all elements,
31+
joined together.
32+
"""
33+
plain_texts = [element.get("plain_text", "") for element in rich_text]
34+
return "".join(plain_texts)
35+
36+
37+
def get_media_source_text(block: dict) -> str:
38+
r"""Extracts the source URL and optional caption from a
39+
Notion media block.
40+
41+
Args:
42+
block: A dictionary representing a Notion media block.
43+
44+
Returns:
45+
A string containing the source URL and caption (if available),
46+
separated by a colon.
47+
"""
48+
block_type = block.get("type", "Unknown Type")
49+
block_content = block.get(block_type, {})
50+
51+
# Extract source URL based on available types
52+
source = (
53+
block_content.get("external", {}).get("url")
54+
or block_content.get("file", {}).get("url")
55+
or block_content.get(
56+
"url", "[Missing case for media block types]: " + block_type
57+
)
58+
)
59+
60+
# Extract caption if available
61+
caption_elements = block_content.get("caption", [])
62+
if caption_elements:
63+
caption = get_plain_text_from_rich_text(caption_elements)
64+
return f"{caption}: {source}"
65+
66+
return source
67+
68+
69+
class NotionToolkit(BaseToolkit):
70+
r"""A toolkit for retrieving information from the user's notion pages.
71+
72+
Attributes:
73+
notion_token (Optional[str], optional): The notion_token used to
74+
interact with notion APIs.(default: :obj:`None`)
75+
notion_client (module): The notion module for interacting with
76+
the notion APIs.
77+
"""
78+
79+
def __init__(
80+
self,
81+
notion_token: Optional[str] = None,
82+
) -> None:
83+
r"""Initializes the NotionToolkit.
84+
85+
Args:
86+
notion_token (Optional[str], optional): The optional notion_token
87+
used to interact with notion APIs.(default: :obj:`None`)
88+
"""
89+
from notion_client import Client
90+
91+
self.notion_token = notion_token or os.environ.get("NOTION_TOKEN")
92+
self.notion_client = Client(auth=self.notion_token)
93+
94+
def list_all_users(self) -> List[dict]:
95+
r"""Lists all users via the Notion integration.
96+
97+
Returns:
98+
List[dict]: A list of user objects with type, name, and workspace.
99+
"""
100+
all_users_info: List[dict] = []
101+
cursor = None
102+
103+
while True:
104+
response = cast(
105+
dict,
106+
self.notion_client.users.list(start_cursor=cursor),
107+
)
108+
all_users_info.extend(response["results"])
109+
110+
if not response["has_more"]:
111+
break
112+
113+
cursor = response["next_cursor"]
114+
115+
formatted_users = [
116+
{
117+
"type": user["type"],
118+
"name": user["name"],
119+
"workspace": user.get(user.get("type"), {}).get(
120+
"workspace_name", ""
121+
),
122+
}
123+
for user in all_users_info
124+
]
125+
126+
return formatted_users
127+
128+
def list_all_pages(self) -> List[dict]:
129+
r"""Lists all pages in the Notion workspace.
130+
131+
Returns:
132+
List[dict]: A list of page objects with title and id.
133+
"""
134+
all_pages_info: List[dict] = []
135+
cursor = None
136+
137+
while True:
138+
response = cast(
139+
dict,
140+
self.notion_client.search(
141+
filter={"property": "object", "value": "page"},
142+
start_cursor=cursor,
143+
),
144+
)
145+
all_pages_info.extend(response["results"])
146+
147+
if not response["has_more"]:
148+
break
149+
150+
cursor = response["next_cursor"]
151+
152+
formatted_pages = [
153+
{
154+
"id": page.get("id"),
155+
"title": next(
156+
(
157+
title.get("text", {}).get("content")
158+
for title in page["properties"]
159+
.get("title", {})
160+
.get("title", [])
161+
if title["type"] == "text"
162+
),
163+
None,
164+
),
165+
}
166+
for page in all_pages_info
167+
]
168+
169+
return formatted_pages
170+
171+
def get_notion_block_text_content(self, block_id: str) -> str:
172+
r"""Retrieves the text content of a Notion block.
173+
174+
Args:
175+
block_id (str): The ID of the Notion block to retrieve.
176+
177+
Returns:
178+
str: The text content of a Notion block, containing all
179+
the sub blocks.
180+
"""
181+
blocks: List[dict] = []
182+
cursor = None
183+
184+
while True:
185+
response = cast(
186+
dict,
187+
self.notion_client.blocks.children.list(
188+
block_id=block_id, start_cursor=cursor
189+
),
190+
)
191+
blocks.extend(response["results"])
192+
193+
if not response["has_more"]:
194+
break
195+
196+
cursor = response["next_cursor"]
197+
198+
block_text_content = " ".join(
199+
[self.get_text_from_block(sub_block) for sub_block in blocks]
200+
)
201+
202+
return block_text_content
203+
204+
def get_text_from_block(self, block: dict) -> str:
205+
r"""Extracts plain text from a Notion block based on its type.
206+
207+
Args:
208+
block (dict): A dictionary representing a Notion block.
209+
210+
Returns:
211+
str: A string containing the extracted plain text and block type.
212+
"""
213+
# Get rich text for supported block types
214+
if block.get(block.get("type"), {}).get("rich_text"):
215+
# Empty string if it's an empty line
216+
text = get_plain_text_from_rich_text(
217+
block[block["type"]]["rich_text"]
218+
)
219+
else:
220+
# Handle block types by case
221+
block_type = block.get("type")
222+
if block_type == "unsupported":
223+
text = "[Unsupported block type]"
224+
elif block_type == "bookmark":
225+
text = block["bookmark"]["url"]
226+
elif block_type == "child_database":
227+
text = block["child_database"]["title"]
228+
# Use other API endpoints for full database data
229+
elif block_type == "child_page":
230+
text = block["child_page"]["title"]
231+
elif block_type in ("embed", "video", "file", "image", "pdf"):
232+
text = get_media_source_text(block)
233+
elif block_type == "equation":
234+
text = block["equation"]["expression"]
235+
elif block_type == "link_preview":
236+
text = block["link_preview"]["url"]
237+
elif block_type == "synced_block":
238+
if block["synced_block"].get("synced_from"):
239+
text = (
240+
f"This block is synced with a block with ID: "
241+
f"""
242+
{block['synced_block']['synced_from']
243+
[block['synced_block']['synced_from']['type']]}
244+
"""
245+
)
246+
else:
247+
text = (
248+
"Source sync block that another"
249+
+ "blocked is synced with."
250+
)
251+
elif block_type == "table":
252+
text = f"Table width: {block['table']['table_width']}"
253+
# Fetch children for full table data
254+
elif block_type == "table_of_contents":
255+
text = f"ToC color: {block['table_of_contents']['color']}"
256+
elif block_type in ("breadcrumb", "column_list", "divider"):
257+
text = "No text available"
258+
else:
259+
text = "[Needs case added]"
260+
261+
# Query children for blocks with children
262+
if block.get("has_children"):
263+
text += self.get_notion_block_text_content(block["id"])
264+
265+
return text
266+
267+
def get_tools(self) -> List[FunctionTool]:
268+
r"""Returns a list of FunctionTool objects representing the
269+
functions in the toolkit.
270+
271+
Returns:
272+
List[FunctionTool]: A list of FunctionTool objects
273+
representing the functions in the toolkit.
274+
"""
275+
return [
276+
FunctionTool(self.list_all_pages),
277+
FunctionTool(self.list_all_users),
278+
FunctionTool(self.get_notion_block_text_content),
279+
]

examples/tool_call/google_scholar_toolkit.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
tools = GoogleScholarToolkit(
2929
author_identifier="https://scholar.google.com/citations?user=JicYPdAAAAAJ&hl=en&oi=ao"
3030
).get_tools()
31+
3132
model_config_dict = ChatGPTConfig(
3233
temperature=0.0,
3334
).as_dict()

0 commit comments

Comments
 (0)