-
Notifications
You must be signed in to change notification settings - Fork 396
/
Copy pathpipeline_settings.yaml
81 lines (71 loc) · 1.92 KB
/
pipeline_settings.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# this yaml file serves as a configuration template for the graphrag indexing jobs
# some values are hardcoded while others will be dynamically set
input:
type: blob
file_type: text
file_pattern: .*\.txt$
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: .
storage:
type: blob
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: output
reporting:
type: blob
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: logs
cache:
type: blob
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: cache
llm:
type: azure_openai_chat
api_base: $GRAPHRAG_API_BASE
api_version: $GRAPHRAG_API_VERSION
model: $GRAPHRAG_LLM_MODEL
deployment_name: $GRAPHRAG_LLM_DEPLOYMENT_NAME
cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT
model_supports_json: True
tokens_per_minute: 80000
requests_per_minute: 480
thread_count: 50
concurrent_requests: 25
parallelization:
stagger: 0.25
num_threads: 10
async_mode: threaded
embeddings:
async_mode: threaded
llm:
type: azure_openai_embedding
api_base: $GRAPHRAG_API_BASE
api_version: $GRAPHRAG_API_VERSION
batch_size: 16
model: $GRAPHRAG_EMBEDDING_MODEL
deployment_name: $GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME
cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT
tokens_per_minute: 350000
concurrent_requests: 25
requests_per_minute: 2100
thread_count: 50
max_retries: 50
parallelization:
stagger: 0.25
num_threads: 10
vector_store:
type: azure_ai_search
collection_name: PLACEHOLDER
title_column: name
overwrite: True
url: $AI_SEARCH_URL
entity_extraction:
prompt: PLACEHOLDER
entity_types: PLACEHOLDER
snapshots:
graphml: True