Skip to content

Commit d50a151

Browse files
shumkovktechmidasvivekgsharmaktechmidas
authored
feat: system and docker metrics in kibana (#617)
* feat: load test nodes * fix: increase soft ulimit as well * feat: metrics provisioning * feat: deploy load test nodes with ansible * fix: ansible host to run on * chore: default to zero for LT nodes * chore: instance type change * feat: initial prometheus implementation in tf * refactor: rename prometheus to metrics nodes * chore: accomplish metrics and load test roles * chore: LT nodes should be AMD * chore: temp fixes * fix: duplicate prometheus port * chore: install latest protoc * fix: invalid .env * chore: redo insight API url * fix: change to http * fix: use dash_devnet_name var instead * chore: restore deploy platbook * modified_network_configs * chore: configure tenderdash mempool cache size * chore: update ouzo * chore: uncomment code * chore: added main_domain var to ansible initial config * chore: configure tenderdash mempool cache size * chore: update ouzo * chore: uncomment code * chore: linter fixes * chore: stop complaining linter * chore: set metrics/lt to always 1 * feat: system and docker metrics in kibana * chore: commented code * chore: update dashmate config * Move 'Fund load tester nodes' block above 'Activate Sporks' in deploy.yml * chore: fix CI * clean up * chore: update to platform 1.0-dev.9 * fix: linter * fix: linter again --------- Co-authored-by: ktechmidas <[email protected]> Co-authored-by: vivekgsharma <[email protected]> Co-authored-by: ktechmidas <[email protected]>
1 parent 0ca3218 commit d50a151

File tree

6 files changed

+89
-11
lines changed

6 files changed

+89
-11
lines changed

ansible/deploy.yml

+9-8
Original file line numberDiff line numberDiff line change
@@ -236,14 +236,6 @@
236236
- role: mn_protx_config
237237
mnlist: "{{ hp_masternodes }}"
238238

239-
- name: Activate sporks
240-
hosts: wallet_nodes
241-
become: true
242-
roles:
243-
- role: activate_dashd_sporks
244-
when: dash_network != "mainnet" and dash_network != "testnet"
245-
delegate_to: '{{ play_hosts | first }}'
246-
247239

248240
- name: Fund load tester nodes
249241
hosts: wallet_nodes
@@ -259,6 +251,15 @@
259251
payment_targets: "{{ load_tester_addresses }}"
260252
amount: "{{ load_tester_wallet_amount }}"
261253

254+
255+
- name: Activate sporks
256+
hosts: wallet_nodes
257+
become: true
258+
roles:
259+
- role: activate_dashd_sporks
260+
when: dash_network != "mainnet" and dash_network != "testnet"
261+
delegate_to: '{{ play_hosts | first }}'
262+
262263
# todo: partially working code causes errors in deploy, comment out for now and fix later
263264
# - name: Create governance proposals
264265
# hosts: wallet_nodes

ansible/requirements.yml

+3
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,8 @@ roles:
3838
- src: geerlingguy.pip
3939
version: 2.2.0
4040

41+
- src: elastic.beats
42+
version: v7.17.0
43+
4144
- src: ktechmidas.openvpn
4245
version: 1.0.0

ansible/roles/dashmate/defaults/main.yml

+5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ dashmate_platform_drive_tenderdash_mempool_max_txs_bytes: 536870912 # 500Mb, def
2828
dashmate_platform_drive_tenderdash_rpc_max_open_connections: 900
2929
dashmate_platform_drive_tenderdash_consensus_peer_gossip_sleep_duration: "100ms"
3030
dashmate_platform_drive_tenderdash_consensus_peer_query_maj23_sleep_duration: "2s"
31+
dashmate_platform_drive_tenderdash_mempool_timeout_check_tx: "1s"
32+
dashmate_platform_drive_tenderdash_mempool_tx_enqueue_timeout: "10ms"
33+
dashmate_platform_drive_tenderdash_mempool_tx_send_rate_limit: 10
34+
dashmate_platform_drive_tenderdash_mempool_tx_recv_rate_limit: 12
35+
dashmate_platform_drive_tenderdash_rpc_timeout_broadcast_tx: "1s"
3136
# Unsafe params. Use only if you understand what you are doing.
3237
dashmate_platform_drive_tenderdash_consensus_unsafe_override_propose_timeout:
3338
dashmate_platform_drive_tenderdash_consensus_unsafe_override_propose_delta:

ansible/roles/dashmate/templates/dashmate.json.j2

+13-2
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,12 @@
179179
"color": null
180180
}{% endif %}
181181
},
182+
"tokioConsole": {
183+
"enabled": false,
184+
"host": "127.0.0.1",
185+
"port": 6669,
186+
"retention_secs": 180
187+
},
182188
"validatorSet": {
183189
"llmqType": {{ platform_drive_validator_set_llmq_type }}
184190
},
@@ -215,12 +221,17 @@
215221
"mempool": {
216222
"cacheSize": {{dashmate_platform_drive_tenderdash_mempool_cache_size}},
217223
"size": {{dashmate_platform_drive_tenderdash_mempool_size}},
218-
"maxTxsBytes": {{dashmate_platform_drive_tenderdash_mempool_max_txs_bytes}}
224+
"maxTxsBytes": {{dashmate_platform_drive_tenderdash_mempool_max_txs_bytes}},
225+
"timeoutCheckTx": "{{dashmate_platform_drive_tenderdash_mempool_timeout_check_tx}}",
226+
"txEnqueueTimeout": "{{dashmate_platform_drive_tenderdash_mempool_tx_enqueue_timeout}}",
227+
"txSendRateLimit": {{dashmate_platform_drive_tenderdash_mempool_tx_send_rate_limit}},
228+
"txRecvRateLimit": {{dashmate_platform_drive_tenderdash_mempool_tx_recv_rate_limit}}
219229
},
220230
"rpc": {
221231
"host": "127.0.0.1",
222232
"port": {{ tendermint_rpc_port }},
223-
"maxOpenConnections": {{dashmate_platform_drive_tenderdash_rpc_max_open_connections}}
233+
"maxOpenConnections": {{dashmate_platform_drive_tenderdash_rpc_max_open_connections}},
234+
"timeoutBroadcastTx": "{{dashmate_platform_drive_tenderdash_rpc_timeout_broadcast_tx}}"
224235
},
225236
"pprof": {
226237
"enabled": {% if dashmate_platform_tenderdash_pprof_enable %}true{% else %}false{% endif %},

ansible/roles/elastic_beats/tasks/main.yml

+59
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,62 @@
5757
- name: Set up filebeat log monitoring
5858
ansible.builtin.include_role:
5959
name: geerlingguy.filebeat
60+
61+
# TODO: Make sure we have retention policy for metrics
62+
- name: Set up metricbeat
63+
ansible.builtin.include_role:
64+
name: elastic.beats
65+
vars:
66+
beats_version: "{{ elastic_version }}"
67+
beat: metricbeat
68+
beat_conf:
69+
setup:
70+
dashboards:
71+
enabled: true
72+
kibana:
73+
host: "{{ hostvars['logs-1'].private_ip }}:5601"
74+
username: "{{ elastic_username }}"
75+
password: "{{ elastic_password }}"
76+
metricbeat:
77+
modules:
78+
- module: system
79+
metricsets:
80+
- cpu # CPU usage
81+
- load # CPU load averages
82+
- memory # Memory usage
83+
- network # Network IO
84+
- process # Per process metrics
85+
- process_summary # Process summary
86+
- uptime # System Uptime
87+
- socket_summary # Socket summary
88+
- core # Per CPU core usage
89+
- diskio # Disk IO
90+
- fsstat # File system summary metrics
91+
- socket # Sockets and connection info (linux only)
92+
enabled: true
93+
period: 10s
94+
processes: ['.*']
95+
96+
# Configure the metric types that are included by these metricsets.
97+
cpu.metrics: ["percentages", "normalized_percentages"] # The other available option is ticks.
98+
core.metrics: ["percentages"] # The other available option is ticks.
99+
- module: docker
100+
metricsets:
101+
- "container"
102+
- "cpu"
103+
- "diskio"
104+
- "event"
105+
- "healthcheck"
106+
- "info"
107+
- "memory"
108+
- "network"
109+
# - "network_summary"
110+
hosts: ["unix:///var/run/docker.sock"]
111+
period: 10s
112+
enabled: true
113+
output_conf:
114+
elasticsearch:
115+
hosts:
116+
- "{{ hostvars['logs-1'].private_ip }}:9200"
117+
username: "{{ elastic_username }}"
118+
password: "{{ elastic_password }}"

lib/configGenerator/generateAnsibleConfig.js

-1
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ async function generateAnsibleConfig(
216216
for (let i = 1; i <= hpMasternodesCount; i++) {
217217
config.hp_masternodes[`hp-masternode-${i}`] = await generateDip3Keys(network);
218218
}
219-
220219
// Tenderdash keys
221220

222221
function generateTenderdashNodeKeys() {

0 commit comments

Comments
 (0)