-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Description
Daily increase of 5TB data, recommendation for microservice Loki configuration
The current configuration of values.yaml is as follows:
`loki:
schemaConfig:
configs:
- from: 2024-04-01
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
storage_config:
aws:
region: ap-singapore
bucketnames: loki-data-1251220924
s3forcepathstyle: false
http_config:
response_header_timeout: 5s
query_frontend:
max_outstanding_per_tenant: 1000
query_sharding:
enabled: true
cache_results: true
ingester:
chunk_encoding: snappy
pattern_ingester:
enabled: true
http_listen_config:
max_recv_msg_size: 83886080
server:
http_server_read_timeout: 600s
http_server_write_timeout: 600s
grpc_server_max_recv_msg_size: 3145728
grpc_server_max_send_msg_size: 3145728
limits_config:
ingestion_burst_size_mb: 1024
ingestion_rate_mb: 512
per_stream_rate_limit: 30MB
per_stream_rate_limit_burst: 60MB
max_line_size: 5242880
query_timeout: 600s
max_entries_limit_per_query: 20000
max_queriers_per_tenant: 40
tsdb_max_query_parallelism: 512
split_queries_by_interval: 30m
max_query_parallelism: 2048
max_query_series: 100000000
ruler_remote_evaluation_timeout: 2m
ruler_remote_evaluation_max_response_size: 104857600
allow_structured_metadata: true
volume_enabled: true
retention_period: 336h
reject_old_samples: true
reject_old_samples_max_age: 336h
chunk_store_config:
chunk_cache_config:
embedded_cache:
enabled: true
max_size_mb: 256
max_look_back_period: 336h
table_manager:
retention_period: 336h
querier:
max_concurrent: 8
storage:
type: s3
bucketNames:
chunks: loki-data-1251220924
ruler: loki-data-1251220924
admin: loki-data-1251220924
s3:
s3: s3://access_key:secret_access_key@custom_endpoint/bucket_name
endpoint: cos.ap-singapore.myqcloud.com
region: ap-singapore
secretAccessKey:
accessKeyId:
signatureVersion: v4
s3ForcePathStyle: false
insecure: false
http_config: {}
deploymentMode: Distributed
minio:
enabled: false
chunk_store_config:
chunk_cache_config:
enable_fifocache: true
ingester:
replicas: 2
zoneAwareReplication:
enabled: false
wal:
enabled: false
resources:
limits:
cpu: 4096m
memory: 8192Mi
requests:
cpu: 1024m
memory: 1024Mi
querier:
replicas: 2
maxUnavailable: 1
resources:
limits:
cpu: 3
memory: 6Gi
requests:
cpu: 1
memory: 1Gi
queryFrontend:
replicas: 2
maxUnavailable: 1
resources:
limits:
cpu: 1024m
memory: 2048Mi
requests:
cpu: 1024m
memory: 2048Mi
queryScheduler:
replicas: 2
resources:
limits:
cpu: 1024m
memory: 512Mi
requests:
cpu: 1024m
memory: 512Mi
distributor:
replicas: 4
maxUnavailable: 2
resources:
limits:
cpu: 2048m
memory: 512Mi
requests:
cpu: 2048m
memory: 512Mi
compactor:
replicas: 1
resources:
limits:
cpu: 2
memory: 10Gi
requests:
cpu: 512m
memory: 1Gi
indexGateway:
replicas: 4
maxUnavailable: 1
resources:
limits:
cpu: 512m
memory: 2048Mi
requests:
cpu: 512m
memory: 1024Mi
tolerations:
- key: "app"
operator: "Equal"
value: "infra"
effect: "NoSchedule"
gateway:
resources:
limits:
cpu: 512m
memory: 2048Mi
requests:
cpu: 512m
memory: 1024Mi
bloomPlanner:
replicas: 0
bloomBuilder:
replicas: 0
bloomGateway:
replicas: 0
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
singleBinary:
replicas: 0`
But querying logs for a day or longer returns very slowly, even crashing after more than ten minutes. What areas can be optimized, such as configuration parameters and pod resources