@@ -5,14 +5,14 @@ metadata:
55 controller-tools.k8s.io : " 1.0"
66 name : kafka
77spec :
8+ kRaft : true
89 monitoringConfig :
910 jmxImage : " ghcr.io/adobe/koperator/jmx-javaagent:1.4.0"
1011 headlessServiceEnabled : true
1112 propagateLabels : false
1213 oneBrokerPerNode : false
1314 clusterImage : " ghcr.io/adobe/koperator/kafka:2.13-3.9.1"
1415 ingressController : " envoy"
15- kRaftMode : true
1616 readOnlyConfig : |
1717 auto.create.topics.enable=false
1818 cruise.control.metrics.topic.auto.create=true
@@ -28,24 +28,236 @@ spec:
2828 resources :
2929 requests :
3030 storage : 10Gi
31+ broker :
32+ processRoles :
33+ - broker
34+ storageConfigs :
35+ - mountPath : " /kafka-logs-broker"
36+ pvcSpec :
37+ accessModes :
38+ - ReadWriteOnce
39+ resources :
40+ requests :
41+ storage : 10Gi
3142 brokerAnnotations :
3243 prometheus.io/scrape : " true"
3344 prometheus.io/port : " 9020"
3445 brokers :
3546 - id : 0
47+ brokerConfigGroup : " broker"
48+ - id : 1
49+ brokerConfigGroup : " broker"
50+ - id : 2
51+ brokerConfigGroup : " broker"
52+ - id : 3
3653 brokerConfigGroup : " default"
3754 brokerConfig :
38- roles :
39- - " broker"
40- - " controller"
41- - id : 2
55+ processRoles :
56+ - controller
57+ - id : 4
58+ brokerConfigGroup : " default"
59+ brokerConfig :
60+ processRoles :
61+ - controller
62+ - id : 5
4263 brokerConfigGroup : " default"
4364 brokerConfig :
44- roles :
45- - " broker"
46- - " controller"
65+ processRoles :
66+ - controller
4767 rollingUpgradeConfig :
4868 failureThreshold : 1
69+ cruiseControlConfig :
70+ cruiseControlTaskSpec :
71+ RetryDurationMinutes : 5
72+ topicConfig :
73+ partitions : 12
74+ replicationFactor : 3
75+ config : |
76+ # Copyright 2017 LinkedIn Corp. Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information.
77+ #
78+ # This is an example property file for Kafka Cruise Control. See KafkaCruiseControlConfig for more details.
79+ # Configuration for the metadata client.
80+ # =======================================
81+ # The maximum interval in milliseconds between two metadata refreshes.
82+ #metadata.max.age.ms=300000
83+ # Client id for the Cruise Control. It is used for the metadata client.
84+ #client.id=kafka-cruise-control
85+ # The size of TCP send buffer bytes for the metadata client.
86+ #send.buffer.bytes=131072
87+ # The size of TCP receive buffer size for the metadata client.
88+ #receive.buffer.bytes=131072
89+ # The time to wait before disconnect an idle TCP connection.
90+ #connections.max.idle.ms=540000
91+ # The time to wait before reconnect to a given host.
92+ #reconnect.backoff.ms=50
93+ # The time to wait for a response from a host after sending a request.
94+ #request.timeout.ms=30000
95+ # Configurations for the load monitor
96+ # =======================================
97+ # The number of metric fetcher thread to fetch metrics for the Kafka cluster
98+ num.metric.fetchers=1
99+ # The metric sampler class
100+ metric.sampler.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.CruiseControlMetricsReporterSampler
101+ # Configurations for CruiseControlMetricsReporterSampler
102+ metric.reporter.topic.pattern=__CruiseControlMetrics
103+ # The sample store class name
104+ sample.store.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.KafkaSampleStore
105+ # The config for the Kafka sample store to save the partition metric samples
106+ partition.metric.sample.store.topic=__KafkaCruiseControlPartitionMetricSamples
107+ # The config for the Kafka sample store to save the model training samples
108+ broker.metric.sample.store.topic=__KafkaCruiseControlModelTrainingSamples
109+ # The replication factor of Kafka metric sample store topic
110+ sample.store.topic.replication.factor=2
111+ # The config for the number of Kafka sample store consumer threads
112+ num.sample.loading.threads=8
113+ # The partition assignor class for the metric samplers
114+ metric.sampler.partition.assignor.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.DefaultMetricSamplerPartitionAssignor
115+ # The metric sampling interval in milliseconds
116+ metric.sampling.interval.ms=120000
117+ metric.anomaly.detection.interval.ms=180000
118+ # The partition metrics window size in milliseconds
119+ partition.metrics.window.ms=300000
120+ # The number of partition metric windows to keep in memory
121+ num.partition.metrics.windows=1
122+ # The minimum partition metric samples required for a partition in each window
123+ min.samples.per.partition.metrics.window=1
124+ # The broker metrics window size in milliseconds
125+ broker.metrics.window.ms=300000
126+ # The number of broker metric windows to keep in memory
127+ num.broker.metrics.windows=20
128+ # The minimum broker metric samples required for a partition in each window
129+ min.samples.per.broker.metrics.window=1
130+ # The configuration for the BrokerCapacityConfigFileResolver (supports JBOD and non-JBOD broker capacities)
131+ capacity.config.file=config/capacity.json
132+ #capacity.config.file=config/capacityJBOD.json
133+ # Configurations for the analyzer
134+ # =======================================
135+ # The list of goals to optimize the Kafka cluster for with pre-computed proposals
136+ default.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal
137+ # The list of supported goals
138+ goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerDiskUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal
139+ # The list of supported hard goals
140+ hard.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal
141+ # The minimum percentage of well monitored partitions out of all the partitions
142+ min.monitored.partition.percentage=0.95
143+ # The balance threshold for CPU
144+ cpu.balance.threshold=1.1
145+ # The balance threshold for disk
146+ disk.balance.threshold=1.1
147+ # The balance threshold for network inbound utilization
148+ network.inbound.balance.threshold=1.1
149+ # The balance threshold for network outbound utilization
150+ network.outbound.balance.threshold=1.1
151+ # The balance threshold for the replica count
152+ replica.count.balance.threshold=1.1
153+ # The capacity threshold for CPU in percentage
154+ cpu.capacity.threshold=0.8
155+ # The capacity threshold for disk in percentage
156+ disk.capacity.threshold=0.8
157+ # The capacity threshold for network inbound utilization in percentage
158+ network.inbound.capacity.threshold=0.8
159+ # The capacity threshold for network outbound utilization in percentage
160+ network.outbound.capacity.threshold=0.8
161+ # The threshold to define the cluster to be in a low CPU utilization state
162+ cpu.low.utilization.threshold=0.0
163+ # The threshold to define the cluster to be in a low disk utilization state
164+ disk.low.utilization.threshold=0.0
165+ # The threshold to define the cluster to be in a low network inbound utilization state
166+ network.inbound.low.utilization.threshold=0.0
167+ # The threshold to define the cluster to be in a low disk utilization state
168+ network.outbound.low.utilization.threshold=0.0
169+ # The metric anomaly percentile upper threshold
170+ metric.anomaly.percentile.upper.threshold=90.0
171+ # The metric anomaly percentile lower threshold
172+ metric.anomaly.percentile.lower.threshold=10.0
173+ # How often should the cached proposal be expired and recalculated if necessary
174+ proposal.expiration.ms=60000
175+ # The maximum number of replicas that can reside on a broker at any given time.
176+ max.replicas.per.broker=10000
177+ # The number of threads to use for proposal candidate precomputing.
178+ num.proposal.precompute.threads=1
179+ # the topics that should be excluded from the partition movement.
180+ #topics.excluded.from.partition.movement
181+ # Configurations for the executor
182+ # =======================================
183+ # The max number of partitions to move in/out on a given broker at a given time.
184+ num.concurrent.partition.movements.per.broker=10
185+ # The interval between two execution progress checks.
186+ execution.progress.check.interval.ms=10000
187+ # Configurations for anomaly detector
188+ # =======================================
189+ # The goal violation notifier class
190+ anomaly.notifier.class=com.linkedin.kafka.cruisecontrol.detector.notifier.SelfHealingNotifier
191+ # The metric anomaly finder class
192+ metric.anomaly.finder.class=com.linkedin.kafka.cruisecontrol.detector.KafkaMetricAnomalyFinder
193+ # The anomaly detection interval
194+ anomaly.detection.interval.ms=10000
195+ # The goal violation to detect.
196+ anomaly.detection.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal
197+ # The interested metrics for metric anomaly analyzer.
198+ metric.anomaly.analyzer.metrics=BROKER_PRODUCE_LOCAL_TIME_MS_MAX,BROKER_PRODUCE_LOCAL_TIME_MS_MEAN,BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MAX,BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MEAN,BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MAX,BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MEAN,BROKER_LOG_FLUSH_TIME_MS_MAX,BROKER_LOG_FLUSH_TIME_MS_MEAN
199+ ## Adjust accordingly if your metrics reporter is an older version and does not produce these metrics.
200+ #metric.anomaly.analyzer.metrics=BROKER_PRODUCE_LOCAL_TIME_MS_50TH,BROKER_PRODUCE_LOCAL_TIME_MS_999TH,BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_50TH,BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_999TH,BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_50TH,BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_999TH,BROKER_LOG_FLUSH_TIME_MS_50TH,BROKER_LOG_FLUSH_TIME_MS_999TH
201+ # The cluster configurations for the KafkaTopicConfigProvider
202+ cluster.configs.file=config/clusterConfigs.json
203+ # The maximum time in milliseconds to store the response and access details of a completed user task.
204+ completed.user.task.retention.time.ms=21600000
205+ # The maximum time in milliseconds to retain the demotion history of brokers.
206+ demotion.history.retention.time.ms=86400000
207+ # The maximum number of completed user tasks for which the response and access details will be cached.
208+ max.cached.completed.user.tasks=500
209+ # The maximum number of user tasks for concurrently running in async endpoints across all users.
210+ max.active.user.tasks=25
211+ # Enable self healing for all anomaly detectors, unless the particular anomaly detector is explicitly disabled
212+ self.healing.enabled=true
213+ # Enable self healing for broker failure detector
214+ #self.healing.broker.failure.enabled=true
215+ # Enable self healing for goal violation detector
216+ #self.healing.goal.violation.enabled=true
217+ # Enable self healing for metric anomaly detector
218+ #self.healing.metric.anomaly.enabled=true
219+ # configurations for the webserver
220+ # ================================
221+ # HTTP listen port
222+ webserver.http.port=9090
223+ # HTTP listen address
224+ webserver.http.address=0.0.0.0
225+ # Whether CORS support is enabled for API or not
226+ webserver.http.cors.enabled=false
227+ # Value for Access-Control-Allow-Origin
228+ webserver.http.cors.origin=http://localhost:8080/
229+ # Value for Access-Control-Request-Method
230+ webserver.http.cors.allowmethods=OPTIONS,GET,POST
231+ # Headers that should be exposed to the Browser (Webapp)
232+ # This is a special header that is used by the
233+ # User Tasks subsystem and should be explicitly
234+ # Enabled when CORS mode is used as part of the
235+ # Admin Interface
236+ webserver.http.cors.exposeheaders=User-Task-ID
237+ # REST API default prefix
238+ # (dont forget the ending *)
239+ webserver.api.urlprefix=/kafkacruisecontrol/*
240+ # Location where the Cruise Control frontend is deployed
241+ webserver.ui.diskpath=./cruise-control-ui/dist/
242+ # URL path prefix for UI
243+ # (dont forget the ending *)
244+ webserver.ui.urlprefix=/*
245+ # Time After which request is converted to Async
246+ webserver.request.maxBlockTimeMs=10000
247+ # Default Session Expiry Period
248+ webserver.session.maxExpiryTimeMs=60000
249+ # Session cookie path
250+ webserver.session.path=/
251+ # Server Access Logs
252+ webserver.accesslog.enabled=true
253+ # Location of HTTP Request Logs
254+ webserver.accesslog.path=access.log
255+ # HTTP Request Log retention days
256+ webserver.accesslog.retention.days=14
257+ clusterConfig : |
258+ {
259+ "min.insync.replicas": 3
260+ }
49261 listenersConfig :
50262 internalListeners :
51263 - type : " plaintext"
0 commit comments