-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Expand file tree
/
Copy pathapp.yaml
More file actions
119 lines (97 loc) · 4.23 KB
/
Copy pathapp.yaml
File metadata and controls
119 lines (97 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# This YAML configuration file is used to set up and configure the Private RAG template.
# It defines various components such as data sources, language models, embedders, splitters, parsers, and retrievers.
# Each section is configured to specify how the template should process and handle data for generating responses.
# You can learn more about the YAML syntax here: https://pathway.com/developers/templates/configure-yaml
# $sources defines the data sources used to read the data which will be indexed in the RAG.
# You can learn more how to configure data sources here:
# https://pathway.com/developers/templates/yaml-examples/data-sources-examples
$sources:
# File System connector, reading data locally.
- !pw.io.fs.read
path: data
format: binary
with_metadata: true
# Uncomment to use the SharePoint connector
# - !pw.xpacks.connectors.sharepoint.read
# url: $SHAREPOINT_URL
# tenant: $SHAREPOINT_TENANT
# client_id: $SHAREPOINT_CLIENT_ID
# cert_path: sharepointcert.pem
# thumbprint: $SHAREPOINT_THUMBPRINT
# root_path: $SHAREPOINT_ROOT
# with_metadata: true
# refresh_interval: 30
# Uncomment to use the Google Drive connector
# - !pw.io.gdrive.read
# object_id: $DRIVE_ID
# service_user_credentials_file: gdrive_indexer.json
# file_name_pattern:
# - "*.pdf"
# - "*.pptx"
# object_size_limit: null
# with_metadata: true
# refresh_interval: 30
# Configures the LLM model settings for generating responses.
# The list of available Pathway LLM wrappers is available here:
# https://pathway.com/developers/api-docs/pathway-xpacks-llm/llms
# You can learn more about those in our documentation:
# https://pathway.com/developers/templates/rag-customization/llm-chats
$llm_model: "ollama/mistral"
$llm: !pw.xpacks.llm.llms.LiteLLMChat
model: $llm_model
retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy
max_retries: 6
cache_strategy: !pw.udfs.DefaultCache {}
temperature: 0
api_base: "http://localhost:11434"
# api_base: "http://host.docker.internal:11434" # use this when you are running the app in the Docker on Mac or Windows
async_mode: "fully_async"
$embedding_model: "avsolatorio/GIST-small-Embedding-v0"
# Specifies the embedder model for converting text into embeddings.
$embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder
model: $embedding_model
call_kwargs:
show_progress_bar: False
# Sets up the splitter for chunking the documents.
$splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
max_tokens: 400
# Configures the parser for processing and extracting information from documents.
$parser: !pw.xpacks.llm.parsers.DoclingParser
table_parsing_strategy: "llm"
async_mode: "fully_async"
chunk: false
cache_strategy: !pw.udfs.DefaultCache {}
# Sets up the retriever factory for indexing and retrieving documents.
$retriever_factory: !pw.indexing.UsearchKnnFactory
reserved_space: 1000
embedder: $embedder
metric: !pw.indexing.USearchMetricKind.COS
# Manages the storage and retrieval of documents for the RAG template.
$document_store: !pw.xpacks.llm.document_store.DocumentStore
docs: $sources
parser: $parser
splitter: $splitter
retriever_factory: $retriever_factory
# Configures the question-answering component using the RAG approach.
question_answerer: !pw.xpacks.llm.question_answering.AdaptiveRAGQuestionAnswerer
llm: $llm
indexer: $document_store
n_starting_documents: 2
factor: 2
max_iterations: 4
strict_prompt: true
# Change host and port of the webserver by uncommenting these lines
# host: "0.0.0.0"
# port: 8000
# By default, caching is enabled for UDFs with cache_strategy set.
# You can disable it by uncommenting the following line.
# persistence_mode: null
# You can also set persistence_mode to !pw.PersistenceMode.PERSISTING to enable persistence
# across restarts.
# By default, when enabled, Cache is stored in .Cache directory.
# You can customize the location by uncommenting and modifying the following lines:
# persistence_backend: !pw.persistence.Backend.filesystem
# path: ".Cache"
# If `terminate_on_error` is true then the program will terminate whenever any error is encountered.
# Defaults to false, uncomment the following line if you want to set it to true
# terminate_on_error: true