kektordb/vectorizers.yaml at main · sanonone/kektordb · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
vectorizers:
  # 3. Appunti Obsidian / Markdown
  # Usa strategia "markdown" (taglia sugli header ##)
  - name: "personal_notes"
    kektor_index: "knowledge_base"
    schedule: "30s"
    source:
      type: "filesystem"
      path: "/home/dash/Documenti/nvim comandi"
    include_patterns: ["*.md", "*.txt", "*.pdf", "*.docx"]
    exclude_patterns: ["draft_*"] # Ignora bozze
    index_config:
      metric: "cosine"
      precision: "float32"       # <--- Risparmia 75% RAM!
      m: 16                   # <--- Migliore qualità grafo
      ef_construction: 200    # <--- Costruzione più lenta ma precisa
      text_language: "italian" # <--- Stemming italiano per la ricerca ibrida
    embedder:
      type: "ollama_api"
      url: "http://localhost:11434/api/embeddings"
      model: "nomic-embed-text-v2-moe:latest"
    document_processor:
      chunking_strategy: "markdown" # <--- Rispetta la struttura MD
      chunk_size: 1000
    metadata_template:
      source_file: "{{file_path}}"
      category: "study_notes"
    graph_enabled: true
    graph_entity_extraction: true
    llm:
      base_url: "http://localhost:11434/v1"
      model: "gemma3:4b" # O llama3, mistral, gpt-4o-mini...
      temperature: 0.0  # Meglio deterministico per estrarre dati

    vision_llm:
      base_url: "http://localhost:11434/v1"
      # IMPORTANTE: Devi aver scaricato questo modello su Ollama!
      # Consigliato: llama3.2-vision (piccolo e potente) o llava
      model: "gemma3:4b"
      temperature: 0.0
      max_tokens: 300 # Limitiamo la descrizione per non intasare l'embedding