vcon-server/example_config.yml at main · vcon-dev/vcon-server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
---
# =============================================================================
# vCon Server Configuration
# =============================================================================
#
# WORKER CONFIGURATION (via environment variables):
#
# CONSERVER_WORKERS: Number of worker processes (default: 1)
#   - Set to 1 for single-threaded mode (original behavior)
#   - Set to 2+ for multi-worker mode (parallel vCon processing)
#   - Each worker independently consumes from Redis queues
#   - Redis BLPOP provides atomic work distribution
#   Example: CONSERVER_WORKERS=4
#
# CONSERVER_PARALLEL_STORAGE: Enable parallel storage writes (default: true)
#   - When true, writes to multiple storage backends happen concurrently
#   - Improves throughput when using multiple storage backends (e.g., S3 + MongoDB)
#   - Set to "false" to use sequential storage writes
#   Example: CONSERVER_PARALLEL_STORAGE=true
#
# CONSERVER_START_METHOD: Multiprocessing start method (default: platform default)
#   - "fork": Copy-on-write memory sharing, fastest startup (Unix only)
#     - Lower memory usage due to copy-on-write
#     - Can cause issues with threads and some libraries (OpenSSL, CUDA)
#   - "spawn": Fresh Python interpreter per worker
#     - Higher memory but safer and more predictable
#     - Required on Windows, recommended for macOS
#   - "forkserver": Hybrid approach using a clean forked server process
#   - "" or unset: Use Python's platform default
#   Example: CONSERVER_START_METHOD=spawn
#
# =============================================================================

# Ingress-specific API key authentication
# This section defines API keys that are authorized to push vCons to specific ingress lists
# Each key grants access only to the specified ingress list, providing secure isolation
# Multiple API keys can be configured per ingress list for different clients/systems
ingress_auth:
  # Single API key for the 'customer_data' ingress list
  customer_data: "customer-api-key-12345"

  # Multiple API keys for the 'support_calls' ingress list (different clients)
  support_calls:
    - "support-api-key-67890"
    - "support-client-2-key"
    - "support-vendor-key-xyz"

  # Multiple API keys for the 'sales_leads' ingress list
  sales_leads:
    - "sales-api-key-abcdef"
    - "sales-partner-key-123"

  # Single API key using string format (backward compatible)
  analytics_data: "analytics-api-key-789"

# Optional imports section for dynamically installed modules
imports:
  # Example of importing a PyPI package
  custom_utility:
    module: custom_utils
    pip_name: custom-utils-package
  # Example of importing from GitHub
  github_helper:
    module: github_helper
    pip_name: git+https://github.com/username/helper-repo.git
  # Example where module name matches pip package name
  requests_import:
    module: requests
    # pip_name not needed since it matches module name
  # Legacy format still supported (string value)
  legacy_module: some.legacy.module

links:
  transcribe:
    module: links.transcribe
    ingress-lists: []
    egress-lists: []
    options:
      transcribe_options:
        model_size: base
        output_options:
        - vendor
  script:
    module: links.script
    ingress-lists: []
    egress-lists: []
  summary:
    module: links.summary
    ingress-lists: []
    egress-lists: []
  tag:
    module: links.tag
    ingress-lists: []
    egress-lists: []
    options:
      tags:
      - Geddy
      - Alex
      - The Professor
  webhook:
    module: links.webhook
    options:
      webhook-urls:
      - notreal.com
  # Example of a link that uses a PyPI package
  custom_pypi_link:
    module: custom_link_module
    pip_name: custom-link-package
    options:
      api_key: your_api_key_here
  # Example of a link that uses a GitHub repository
  custom_github_link:
    module: github_link_module
    pip_name: git+https://github.com/username/repo.git
    options:
      config_param: value
  # Example of a link where module name matches pip package name
  requests_link:
    module: requests
    # pip_name not needed since it matches module name
    options:
      timeout: 30

tracers:
  # Uncomment the below to add JLINC auditing
  # Also add `jlinc` as a tracer below
  # jlinc:
  #   module: tracers.jlinc
  #   options:
  #     data_store_api_url: http://jlinc-server:9090
  #     data_store_api_key: <acquire from UI>
  #     archive_api_url: http://jlinc-server:9090
  #     archive_api_key: <acquire from UI>
  #     system_prefix: VCONTest
  #     agreement_id: 00000000-0000-0000-0000-000000000000
  #     hash_event_data: True
  #     dlq_vcon_on_error: True

storages:
  file:
    module: storage.file
    options:
      path: /data/vcons
      organize_by_date: true
      compression: false
      max_file_size: 10485760
      file_permissions: 0o644
      dir_permissions: 0o755
  mongo:
    module: storage.mongo
    options:
      url: mongodb://root:example@mongo:27017/
      database: conserver
      collection: vcons
  postgres:
    module: storage.postgres
    options:
      user: thomashowe
      password: postgres
      host: localhost
      port: '5432'
      database: postgres
  s3:
    module: storage.s3
    options:
      aws_access_key_id: some_key
      aws_secret_access_key: some_secret
      aws_bucket: some_bucket
      aws_region: us-east-1  # AWS region where the bucket is located
      # endpoint_url: null  # Optional: custom endpoint for S3-compatible services
      # s3_path: vcons      # Optional: prefix for S3 keys
  milvus:
    module: storage.milvus
    options:
      # Connection settings
      host: "localhost"
      port: "19530"
      collection_name: "vcons"

      # Embedding settings
      embedding_model: "text-embedding-3-small"
      embedding_dim: 1536
      api_key: "your-openai-api-key"
      organization: ""

      # Operation settings
      create_collection_if_missing: true
      skip_if_exists: true

      # Vector index settings (Default: IVF_FLAT with L2 distance)
      index_type: "IVF_FLAT"  # Options: IVF_FLAT, IVF_SQ8, IVF_PQ, HNSW, FLAT
      metric_type: "L2"       # Options: L2, IP, COSINE
      nlist: 128              # For IVF indexes: number of clusters

      # Advanced HNSW settings (used only if index_type is HNSW)
      # m: 16                 # Number of edges per node
      # ef_construction: 200  # Size of dynamic candidate list during construction

      # Advanced IVF_PQ settings (used only if index_type is IVF_PQ)
      # pq_m: 8               # Number of sub-quantizers
      # pq_nbits: 8           # Bit depth per quantizer
chains:
  sample_chain:
    links:
    - transcribe
    - script
    - summary
    - tag
    ingress_lists:
    - test_list
    storages:
    - mongo
    - postgres
    - s3
    - milvus
    # tracers:
    # - jlinc
    egress_lists:
    - test_output
    enabled: 1
  # Example chain using custom links with dynamic installation
  custom_chain:
    links:
    - custom_pypi_link
    - custom_github_link
    - requests_link
    ingress_lists:
    - custom_test_list
    storages:
    - mongo
    # tracers:
    # - jlinc
    egress_lists:
    - custom_output
    enabled: 1