forked from AmadeusITGroup/docs2vecs
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsample-config-file-3.yml
More file actions
63 lines (55 loc) · 1.58 KB
/
sample-config-file-3.yml
File metadata and controls
63 lines (55 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
definitions:
- skill: &ScrollHTMLExporter
type: exporter
name: scrollhtml-exporter
params:
api_url: https://scroll-html.de.exporter.k15t.app/api/public/1/exports
auth_token: env.SCROLL_HTML_EXPORTER_TOKEN
poll_interval: 2
export_folder: ~/Downloads/html_export
scope: current
confluence_prefix: https://amadeus.atlassian.net/wiki
page_ids:
- id: 1436680207
- skill: &HtmlToMarkdown
type: transformer
name: confluence-html-to-markdown
params:
input_dir: ~/Downloads/html_export/1436680207
output_dir: ~/Downloads/html_export/1436680207/markdown
- skill: &FileScanner
type: file-scanner
name: multi-file-scanner
params:
path: ~/Downloads/html_export/1436680207/markdown
filter: ["*.md"]
recursive: false
- skill: &FileReader
type: file-reader
name: multi-file-reader
- skill: &TextSplitter
type: splitter
name: recursive-character-splitter
params:
chunk_size: 1200
overlap: 200
- skill: &FastEmbed
type: embedding
name: llama-fastembed
- skill: &ChromaDbVectorStore
type: vector-store
name: chromadb
params:
db_path: ~/Downloads/html_export/chroma_db
collection_name: confluence-html-export
- skillset: &Pipeline
- *ScrollHTMLExporter
- *HtmlToMarkdown
- *FileScanner
- *FileReader
- *TextSplitter
- *FastEmbed
- *ChromaDbVectorStore
indexer:
id: confluence-html-to-vectorstore
skillset: *Pipeline