You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: src/talkpipe/app/chatterlang_workbench.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -94,7 +94,7 @@ def emit(self, record):
94
94
{
95
95
"name": "RAG Pipeline with Vector Database",
96
96
"description": "Build a complete RAG system with document indexing and querying",
97
-
"code": '# This example demonstrates a complete RAG (Retrieval-Augmented Generation) workflow.\n# It indexes documents into a vector database and then queries them with an LLM.\n\n# Sample knowledge base documents (in a real scenario, these would be from files or a database)\nCONST docs = "TalkPipe is a Python toolkit for building AI workflows. It provides a Unix-like pipeline syntax for chaining data transformations and LLM operations.|TalkPipe supports multiple LLM providers including OpenAI, Ollama, and Anthropic. You can switch between providers easily using configuration.|With TalkPipe, you can build RAG systems, multi-agent debates, and document processing pipelines. It uses Python generators for memory-efficient streaming.";\n\n# Step 1: Index documents into a vector database\nINPUT FROM echo[data=docs, delimiter="|"] \n | toDict[field_list="_:text"] \n | makeVectorDatabase[\n path="./demo_knowledge_base",\n embedding_model="nomic-embed-text",\n embedding_source="ollama",\n embedding_field="text"\n ] \n | print;\n\n# Step 2: Query the knowledge base with RAG\nINPUT FROM echo[data="What are the key benefits of using TalkPipe?"] \n | toDict[field_list="_:text"] \n | ragToText[\n path="./demo_knowledge_base",\n embedding_model="nomic-embed-text",\n embedding_source="ollama",\n completion_model="llama3.2",\n completion_source="ollama",\n content_field="text",\n prompt_directive="Answer the question based on the background information provided.",\n limit=3\n ] \n | print'
97
+
"code": '# This example demonstrates a complete RAG (Retrieval-Augmented Generation) workflow.\n# It indexes documents into a vector database and then queries them with an LLM.\n\n# Sample knowledge base documents (in a real scenario, these would be from files or a database)\nCONST docs = "TalkPipe is a Python toolkit for building AI workflows. It provides a Unix-like pipeline syntax for chaining data transformations and LLM operations.|TalkPipe supports multiple LLM providers including OpenAI, Ollama, and Anthropic. You can switch between providers easily using configuration.|With TalkPipe, you can build RAG systems, multi-agent debates, and document processing pipelines. It uses Python generators for memory-efficient streaming.";\n\n# Step 1: Index documents into a vector database\nINPUT FROM echo[data=docs, delimiter="|"] \n | toDict[field_list="_:text"] \n | makeVectorDatabase[\n path="tmp://demo_knowledge_base",\n embedding_model="nomic-embed-text",\n embedding_source="ollama",\n embedding_field="text"\n ] \n | print;\n\n# Step 2: Query the knowledge base with RAG\nINPUT FROM echo[data="What are the key benefits of using TalkPipe?"] \n | toDict[field_list="_:text"] \n | ragToText[\n path="tmp://demo_knowledge_base",\n embedding_model="nomic-embed-text",\n embedding_source="ollama",\n completion_model="llama3.2",\n completion_source="ollama",\n content_field="text",\n prompt_directive="Answer the question based on the background information provided.",\n limit=3\n ] \n | print'
""" Convenience segment that runs a RAG pipeline from search to prompt creation to LLM completion.
64
+
65
+
Path supports multiple URI schemes:
66
+
- File path: "./my_db" or "/path/to/db" - Persistent storage
67
+
- Memory: "memory://" - Ephemeral in-memory database (faster, no disk I/O)
68
+
- Temp: "tmp://name" - Process-scoped temporary database (shared by name, auto-cleanup on exit)
64
69
"""
65
-
70
+
66
71
def__init__(self,
67
72
embedding_model: Annotated[str, "Embedding model to use"],
68
73
embedding_source: Annotated[str, "Source of text to embed"],
69
74
completion_model: Annotated[str, "LLM model to use for completion"],
70
75
completion_source: Annotated[str, "Source of prompt for completion"],
71
-
path: Annotated[str, "Path to the LanceDB database"],
76
+
path: Annotated[str, "Path to LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
72
77
content_field: Annotated[Any, "Field to evaluate relevance on"],
73
78
prompt_directive: Annotated[str, "Directive to guide the evaluation"] ="Respond to the provided content based on the background information. If the background does not contain relevant information, respond with 'No relevant information found.'",
74
79
set_as: Annotated[str, "The field to set/append the result as."] =None,
""" RAG pipeline that outputs text completions from LLM.
121
+
122
+
Path supports multiple URI schemes:
123
+
- File path: "./my_db" or "/path/to/db" - Persistent storage
124
+
- Memory: "memory://" - Ephemeral in-memory database (faster, no disk I/O)
125
+
- Temp: "tmp://name" - Process-scoped temporary database (shared by name, auto-cleanup on exit)
116
126
"""
117
127
118
128
def__init__(self,
119
129
embedding_model: Annotated[str, "Embedding model to use"],
120
130
embedding_source: Annotated[str, "Source of text to embed"],
121
131
completion_model: Annotated[str, "LLM model to use for completion"],
122
132
completion_source: Annotated[str, "Source of prompt for completion"],
123
-
path: Annotated[str, "Path to the LanceDB database"],
133
+
path: Annotated[str, "Path to LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
124
134
content_field: Annotated[Any, "Field to evaluate relevance on"],
125
135
prompt_directive: Annotated[str, "Directive to guide the evaluation"] ="Respond to the provided content based on the background information. If the background does not contain relevant information, respond with 'No relevant information found.'",
126
136
set_as: Annotated[str, "The field to set/append the result as."] =None,
""" RAG pipeline that outputs binary answers from LLM.
159
+
160
+
Path supports multiple URI schemes:
161
+
- File path: "./my_db" or "/path/to/db" - Persistent storage
162
+
- Memory: "memory://" - Ephemeral in-memory database (faster, no disk I/O)
163
+
- Temp: "tmp://name" - Process-scoped temporary database (shared by name, auto-cleanup on exit)
149
164
"""
150
165
151
166
def__init__(self,
152
167
embedding_model: Annotated[str, "Embedding model to use"],
153
168
embedding_source: Annotated[str, "Source of text to embed"],
154
169
completion_model: Annotated[str, "LLM model to use for completion"],
155
170
completion_source: Annotated[str, "Source of prompt for completion"],
156
-
path: Annotated[str, "Path to the LanceDB database"],
171
+
path: Annotated[str, "Path to LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
157
172
content_field: Annotated[Any, "Field to evaluate relevance on"],
158
173
prompt_directive: Annotated[str, "Directive to guide the evaluation"] ="Answer the provided question as YES or NO. If the background does not contain relevant information, respond with 'NO'.",
159
174
set_as: Annotated[str, "The field to set/append the result as."] =None,
- File path: "./my_db" or "/path/to/db" - Persistent storage
201
+
- Memory: "memory://" - Ephemeral in-memory database (faster, no disk I/O)
202
+
- Temp: "tmp://name" - Process-scoped temporary database (shared by name, auto-cleanup on exit)
183
203
"""
184
204
185
205
def__init__(self,
186
206
embedding_model: Annotated[str, "Embedding model to use"],
187
207
embedding_source: Annotated[str, "Source of text to embed"],
188
208
completion_model: Annotated[str, "LLM model to use for completion"],
189
209
completion_source: Annotated[str, "Source of prompt for completion"],
190
-
path: Annotated[str, "Path to the LanceDB database"],
210
+
path: Annotated[str, "Path to LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
191
211
content_field: Annotated[Any, "Field to evaluate relevance on"],
192
212
prompt_directive: Annotated[str, "Directive to guide the evaluation"] ="Answer the provided question on a scale of 1 to 10. If the background does not contain relevant information, respond with a score of 1.",
193
213
set_as: Annotated[str, "The field to set/append the result as."] =None,
@@ -52,12 +57,17 @@ class SearchVectorDatabaseSegment(AbstractSegment):
52
57
search results are yielded (set_as must be None).
53
58
- If query_field is specified: Expects dictionary inputs, embeds the specified field,
54
59
and search results can be yielded directly (set_as=None) or attached to the input item.
60
+
61
+
Path supports multiple URI schemes:
62
+
- File path: "./my_db" or "/path/to/db" - Persistent storage
63
+
- Memory: "memory://" - Ephemeral in-memory database (faster, no disk I/O)
64
+
- Temp: "tmp://name" - Process-scoped temporary database (shared by name, auto-cleanup on exit)
55
65
"""
56
66
57
67
def__init__(self,
58
68
embedding_model: Annotated[str, "Embedding model to use"],
59
69
embedding_source: Annotated[str, "Source of text to embed"],
60
-
path: Annotated[str, "Path to the LanceDB database"],
70
+
path: Annotated[str, "Path to LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
61
71
table_name: Annotated[str, "Name of the table in the database"] ="docs",
62
72
query_field: Annotated[Optional[str], "Field containing the query text to embed. If None, expects string inputs."] =None,
63
73
limit: Annotated[int, "Number of search results to return"] =10,
defsearch_lancedb(items: Annotated[object, "Items with the query vectors"],
17
-
path: Annotated[str, "Path to the LanceDB database"],
59
+
path: Annotated[str, "Path to the LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
18
60
table_name: Annotated[str, "Table name in the LanceDB database"],
19
61
all_results_at_once: Annotated[bool, "If true, return all results at once"]=False,
20
62
field: Annotated[str, "Field with the vector"]=None,
@@ -25,6 +67,11 @@ def search_lancedb(items: Annotated[object, "Items with the query vectors"],
25
67
):
26
68
"""Search for similar vectors in LanceDB and return SearchResult objects.
27
69
70
+
The path parameter supports multiple URI schemes:
71
+
- File path: "./my_db" or "/path/to/db" - Persistent storage
72
+
- Memory: "memory://" - Ephemeral in-memory database (faster, no disk I/O)
73
+
- Temp: "tmp://name" - Process-scoped temporary database (shared by name, auto-cleanup on exit)
74
+
28
75
Yields:
29
76
SearchResult objects or lists of SearchResult objects.
30
77
"""
@@ -59,7 +106,7 @@ def search_lancedb(items: Annotated[object, "Items with the query vectors"],
59
106
@register_segment("addToLanceDB", "addToLancDB")
60
107
@segment()
61
108
defadd_to_lancedb(items: Annotated[object, "Items with the vectors and documents"],
62
-
path: Annotated[str, "Path to the LanceDB database"],
109
+
path: Annotated[str, "Path to the LanceDB database. Supports file paths, 'memory://' for in-memory, or 'tmp://name' for process-scoped temp (auto-cleanup)"],
63
110
table_name: Annotated[str, "Table name in the LanceDB database"],
64
111
vector_field: Annotated[str, "The field containing the vector data"] ="vector",
0 commit comments