-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfig.yaml
More file actions
51 lines (46 loc) · 1.68 KB
/
config.yaml
File metadata and controls
51 lines (46 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
encoder:
model_path: "sentence-transformers/all-MiniLM-l6-v2"
cache_embeddings: True
persistent_cache: True # if True, embeddings are cached in cache_dir, if False, they are cached in memory
cache_dir: ".cache"
model_kwargs:
device: "cpu" # "cuda" for NVIDIA GPU or "cpu"
encode_kwargs:
normalize_embeddings: False
retriever:
splitting:
chunk_size: 256
chunk_overlap: 64
num_docs: 8
cache_dir: ".cache"
generator:
llm_path: "models/tinyllama-1.1b-chat-v1.0.Q8_0.gguf"
context_length: 2048
temperature: 0.8
max_tokens: 256
gpu_layers: 0 # number of layers to offload to the GPU (if available), null for all layers
model_kwargs: {}
search_kwargs: {}
similarity_threshold: 0.75
compress: False # Not un use. Eventually the context can be compressed to remove redundant information
llama: False # true if model is a llama model
autoflush: True # automatically clear the model stdout and stderr buffers, False to manually control flushing
dataset:
path: "./data"
recursive: True
show_progress: True # show progress bar when loading dataset
use_multithreading: True
loader_kwargs:
mode: "single" # either "single" or "elements"
unstructured_kwargs:
include_metadata: True
languages: ["en"] # language can be "auto", or a list of known languages
chunking_strategy: "by_title" # "by_title" or null (no chunking)
prompt:
user_name: "User"
AI_name: "Assistant"
instruct_user: "Instruct" # usually "System", or "Instruct"
instructions: "You are a friendly, helpful AI assistant. Succinctly and truthfully answer the user's question "
pre_context: "using the following documents"
post_context: ""
user_query: "{question}"