Skip to content

Commit 2bc0cdf

Browse files
banmoymergify[bot]
authored andcommitted
[BugFix] Fix merge commit latency metrics overflow (#67168)
Signed-off-by: PengFei Li <[email protected]> (cherry picked from commit 39bc4ea) # Conflicts: # docs/en/administration/management/monitoring/metrics.md # docs/ja/administration/management/monitoring/metrics.md # docs/zh/administration/management/monitoring/metrics.md
1 parent ae7b46e commit 2bc0cdf

File tree

4 files changed

+616
-32
lines changed

4 files changed

+616
-32
lines changed

be/src/runtime/batch_write/isomorphic_batch_write.cpp

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,18 @@ bvar::Adder<int64_t> g_mc_send_rpc_total("merge_commit", "send_rpc_total");
5454
bvar::Adder<int64_t> g_mc_register_pipe_total("merge_commit", "register_pipe_total");
5555
// Counter for stream load pipes unregistered for merge commit operations
5656
bvar::Adder<int64_t> g_mc_unregister_pipe_total("merge_commit", "unregister_pipe_total");
57-
// Latency recorder for end-to-end merge commit processing time (nanoseconds)
58-
bvar::LatencyRecorder g_mc_total_latency_ns("merge_commit", "reqeust");
59-
// Latency recorder for time tasks spend in pending queue before execution (nanoseconds)
60-
bvar::LatencyRecorder g_mc_pending_latency_ns("merge_commit", "pending");
61-
// Latency recorder for combined RPC request time and pipe availability wait time (nanoseconds)
62-
bvar::LatencyRecorder g_mc_wait_plan_latency_ns("merge_commit", "wait_plan");
63-
// Latency recorder for time spent appending data to stream load pipes (nanoseconds)
64-
bvar::LatencyRecorder g_mc_append_pipe_latency_ns("merge_commit", "append_pipe");
65-
// Latency recorder for time spent waiting for load operations to complete (nanoseconds)
66-
bvar::LatencyRecorder g_mc_wait_finish_latency_ns("merge_commit", "wait_finish");
57+
// Latency recorder for end-to-end merge commit processing time (microseconds)
58+
bvar::LatencyRecorder g_mc_total_latency_us("merge_commit", "request");
59+
// Latency recorder for time tasks spend in pending queue before execution (microseconds)
60+
bvar::LatencyRecorder g_mc_pending_latency_us("merge_commit", "pending");
61+
// Latency recorder for combined RPC request time and pipe availability wait time (microseconds)
62+
bvar::LatencyRecorder g_mc_wait_plan_latency_us("merge_commit", "wait_plan");
63+
// Latency recorder for time spent appending data to stream load pipes (microseconds)
64+
bvar::LatencyRecorder g_mc_append_pipe_latency_us("merge_commit", "append_pipe");
65+
// Latency recorder for time spent waiting for load operations to complete (microseconds)
66+
bvar::LatencyRecorder g_mc_wait_finish_latency_us("merge_commit", "wait_finish");
67+
68+
#define NS_TO_US(x) ((x) / 1000)
6769

6870
class AsyncAppendDataContext {
6971
public:
@@ -259,7 +261,7 @@ Status IsomorphicBatchWrite::append_data(StreamLoadContext* data_ctx) {
259261
} else {
260262
g_mc_fail_total << 1;
261263
}
262-
g_mc_total_latency_ns << (MonotonicNanos() - start_time_ns);
264+
g_mc_total_latency_us << NS_TO_US(MonotonicNanos() - start_time_ns);
263265
});
264266
if (_stopped.load(std::memory_order_acquire)) {
265267
return Status::ServiceUnavailable("Batch write is stopped");
@@ -296,14 +298,14 @@ Status IsomorphicBatchWrite::_create_and_wait_async_task(starrocks::StreamLoadCo
296298
async_ctx->total_cost_ns.store(MonotonicNanos() - async_ctx->create_time_ts);
297299
TRACE_BATCH_WRITE << "wait async finish, " << _batch_write_id << ", user label: " << async_ctx->data_ctx()->label
298300
<< ", user ip: " << data_ctx->auth.user_ip << ", data size: " << data_ctx->receive_bytes
299-
<< ", total_cost: " << (async_ctx->total_cost_ns / 1000)
300-
<< "us, total_async_cost: " << (async_ctx->total_async_cost_ns / 1000)
301-
<< "us, task_pending_cost: " << (async_ctx->task_pending_cost_ns / 1000)
302-
<< "us, append_pipe_cost: " << (async_ctx->append_pipe_cost_ns / 1000)
303-
<< "us, rpc_cost: " << (async_ctx->rpc_cost_ns / 1000)
304-
<< "us, wait_pipe_cost: " << (async_ctx->wait_pipe_cost_ns / 1000)
301+
<< ", total_cost: " << NS_TO_US(async_ctx->total_cost_ns)
302+
<< "us, total_async_cost: " << NS_TO_US(async_ctx->total_async_cost_ns)
303+
<< "us, task_pending_cost: " << NS_TO_US(async_ctx->task_pending_cost_ns)
304+
<< "us, append_pipe_cost: " << NS_TO_US(async_ctx->append_pipe_cost_ns)
305+
<< "us, rpc_cost: " << NS_TO_US(async_ctx->rpc_cost_ns)
306+
<< "us, wait_pipe_cost: " << NS_TO_US(async_ctx->wait_pipe_cost_ns)
305307
<< "us, num retries: " << async_ctx->num_retries
306-
<< ", pipe_left_active: " << (async_ctx->pipe_left_active_ns / 1000)
308+
<< ", pipe_left_active: " << NS_TO_US(async_ctx->pipe_left_active_ns)
307309
<< ", async_status: " << async_ctx->get_status() << ", txn_id: " << async_ctx->txn_id()
308310
<< ", label: " << async_ctx->label();
309311
data_ctx->txn_id = async_ctx->txn_id();
@@ -327,20 +329,20 @@ int IsomorphicBatchWrite::_execute_tasks(void* meta, bthread::TaskIterator<Task>
327329
ctx->task_pending_cost_ns.store(MonotonicNanos() - ctx->create_time_ts);
328330
g_mc_pending_total << -1;
329331
g_mc_pending_bytes << -ctx->data_ctx()->receive_bytes;
330-
g_mc_pending_latency_ns << ctx->task_pending_cost_ns;
332+
g_mc_pending_latency_us << NS_TO_US(ctx->task_pending_cost_ns);
331333
auto st = batch_write->_execute_write(ctx);
332334
ctx->finish_async(st);
333335
ctx->total_async_cost_ns.store(MonotonicNanos() - start_ts);
334336
TRACE_BATCH_WRITE << "async task finish, " << batch_write->_batch_write_id
335337
<< ", user label: " << ctx->data_ctx()->label
336338
<< ", data size: " << ctx->data_ctx()->receive_bytes
337-
<< ", total_async_cost: " << (ctx->total_async_cost_ns / 1000)
338-
<< "us, task_pending_cost: " << (ctx->task_pending_cost_ns / 1000)
339-
<< "us, append_pipe_cost: " << (ctx->append_pipe_cost_ns / 1000)
340-
<< "us, rpc_cost: " << (ctx->rpc_cost_ns / 1000)
341-
<< "us, wait_pipe_cost: " << (ctx->wait_pipe_cost_ns / 1000)
339+
<< ", total_async_cost: " << NS_TO_US(ctx->total_async_cost_ns)
340+
<< "us, task_pending_cost: " << NS_TO_US(ctx->task_pending_cost_ns)
341+
<< "us, append_pipe_cost: " << NS_TO_US(ctx->append_pipe_cost_ns)
342+
<< "us, rpc_cost: " << NS_TO_US(ctx->rpc_cost_ns)
343+
<< "us, wait_pipe_cost: " << NS_TO_US(ctx->wait_pipe_cost_ns)
342344
<< "us, num retries: " << ctx->num_retries
343-
<< ", pipe_left_active: " << (ctx->pipe_left_active_ns / 1000) << "us, status: " << st
345+
<< ", pipe_left_active: " << NS_TO_US(ctx->pipe_left_active_ns) << "us, status: " << st
344346
<< ", txn_id: " << ctx->txn_id() << ", label: " << ctx->label();
345347
;
346348
AsyncAppendDataContext::release(ctx);
@@ -387,13 +389,13 @@ Status IsomorphicBatchWrite::_execute_write(AsyncAppendDataContext* async_ctx) {
387389
async_ctx->rpc_cost_ns.store(rpc_cost_ns);
388390
async_ctx->wait_pipe_cost_ns.store(wait_pipe_cost_ns);
389391
async_ctx->num_retries.store(num_retries);
390-
g_mc_append_pipe_latency_ns << write_data_cost_ns;
391-
g_mc_wait_plan_latency_ns << (rpc_cost_ns + wait_pipe_cost_ns);
392+
g_mc_append_pipe_latency_us << NS_TO_US(write_data_cost_ns);
393+
g_mc_wait_plan_latency_us << NS_TO_US(rpc_cost_ns + wait_pipe_cost_ns);
392394
if (!st.ok()) {
393395
std::stringstream stream;
394396
stream << "Failed to write data to stream load pipe, num retry: " << num_retries
395-
<< ", write_data: " << (write_data_cost_ns / 1000) << " us, rpc: " << (rpc_cost_ns / 1000)
396-
<< "us, wait_pipe: " << (wait_pipe_cost_ns / 1000) << " us, last error: " << st;
397+
<< ", write_data: " << NS_TO_US(write_data_cost_ns) << " us, rpc: " << NS_TO_US(rpc_cost_ns)
398+
<< "us, wait_pipe: " << NS_TO_US(wait_pipe_cost_ns) << " us, last error: " << st;
397399
st = Status::InternalError(stream.str());
398400
}
399401
return st;
@@ -464,7 +466,7 @@ Status IsomorphicBatchWrite::_send_rpc_request(StreamLoadContext* data_ctx) {
464466
config::merge_commit_rpc_reqeust_timeout_ms);
465467
TRACE_BATCH_WRITE << "receive requestBatchWrite response, " << _batch_write_id
466468
<< ", user label: " << data_ctx->label << ", master: " << master_addr
467-
<< ", cost: " << ((MonotonicNanos() - start_ts) / 1000) << "us, status: " << st
469+
<< ", cost: " << NS_TO_US(MonotonicNanos() - start_ts) << "us, status: " << st
468470
<< ", response: " << response;
469471
#else
470472
TEST_SYNC_POINT_CALLBACK("IsomorphicBatchWrite::send_rpc_request::request", &request);
@@ -490,10 +492,10 @@ Status IsomorphicBatchWrite::_wait_for_load_finish(StreamLoadContext* data_ctx)
490492
int64_t start_ts = MonotonicNanos();
491493
StatusOr<TxnState> status_or = subscriber->wait_finished_state(left_timeout_ms * 1000);
492494
data_ctx->mc_wait_finish_cost_nanos = MonotonicNanos() - start_ts;
493-
g_mc_wait_finish_latency_ns << data_ctx->mc_wait_finish_cost_nanos;
495+
g_mc_wait_finish_latency_us << NS_TO_US(data_ctx->mc_wait_finish_cost_nanos);
494496
TRACE_BATCH_WRITE << "finish to wait load, " << _batch_write_id << ", user label: " << data_ctx->label
495497
<< ", txn_id: " << data_ctx->txn_id << ", load label: " << data_ctx->batch_write_label
496-
<< ", cost: " << (data_ctx->mc_wait_finish_cost_nanos / 1000)
498+
<< ", cost: " << NS_TO_US(data_ctx->mc_wait_finish_cost_nanos)
497499
<< "us, wait status: " << status_or.status() << ", "
498500
<< (status_or.ok() ? status_or.value() : subscriber->current_state());
499501
if (!status_or.ok()) {

docs/en/administration/management/monitoring/metrics.md

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1678,3 +1678,195 @@ For more information on how to build a monitoring service for your StarRocks clu
16781678

16791679
- Unit: Count
16801680
- Description: The number of times blacklisted sql have been intercepted.
1681+
<<<<<<< HEAD
1682+
=======
1683+
1684+
### starrocks_fe_scheduled_pending_tablet_num
1685+
1686+
- Unit: Count
1687+
- Type: Instantaneous
1688+
- Description: The number of Clone tasks in Pending state FE scheduled, including both BALANCE and REPAIR types.
1689+
1690+
### starrocks_fe_scheduled_running_tablet_num
1691+
1692+
- Unit: Count
1693+
- Type: Instantaneous
1694+
- Description: The number of Clone tasks in Running state FE scheduled, including both BALANCE and REPAIR types.
1695+
1696+
### starrocks_fe_clone_task_total
1697+
1698+
- Unit: Count
1699+
- Type: Cumulative
1700+
- Description: The total number of Clone tasks in the cluster.
1701+
1702+
### starrocks_fe_clone_task_success
1703+
1704+
- Unit: Count
1705+
- Type: Cumulative
1706+
- Description: The number of successfully executed Clone tasks in the cluster.
1707+
1708+
### starrocks_fe_clone_task_copy_bytes
1709+
1710+
- Unit: Bytes
1711+
- Type: Cumulative
1712+
- Description: The total file size copied by Clone tasks in the cluster, including both INTER_NODE and INTRA_NODE types.
1713+
1714+
### starrocks_fe_clone_task_copy_duration_ms
1715+
1716+
- Unit: ms
1717+
- Type: Cumulative
1718+
- Description: The total time for copy consumed by Clone tasks in the cluster, including both INTER_NODE and INTRA_NODE types.
1719+
1720+
### starrocks_be_clone_task_copy_bytes
1721+
1722+
- Unit: Bytes
1723+
- Type: Cumulative
1724+
- Description: The total file size copied by Clone tasks in the BE node, including both INTER_NODE and INTRA_NODE types.
1725+
1726+
### starrocks_be_clone_task_copy_duration_ms
1727+
1728+
- Unit: ms
1729+
- Type: Cumulative
1730+
- Description: The total time for copy consumed by Clone tasks in the BE node, including both INTER_NODE and INTRA_NODE types.
1731+
1732+
### Transaction Latency Metrics
1733+
1734+
The following metrics are `summary`-type metrics that provide latency distributions for different phases of a transaction. These metrics are reported exclusively by the Leader FE node.
1735+
1736+
Each metric includes the following outputs:
1737+
- **Quantiles**: Latency values at different percentile boundaries. These are exposed via the `quantile` label, which can have values of `0.75`, `0.95`, `0.98`, `0.99`, and `0.999`.
1738+
- **`<metric_name>_sum`**: The total cumulative time spent in this phase, for example, `starrocks_fe_txn_total_latency_ms_sum`.
1739+
- **`<metric_name>_count`**: The total number of transactions recorded for this phase, for example, `starrocks_fe_txn_total_latency_ms_count`.
1740+
1741+
All transaction metrics share the following labels:
1742+
- `type`: Categorizes transactions by their load job source type (for example, `all`, `stream_load`, `routine_load`). This allows for monitoring both overall transaction performance and the performance of specific load types. The reported groups can be configured via the FE parameter [`txn_latency_metric_report_groups`](../FE_configuration.md#txn_latency_metric_report_groups).
1743+
- `is_leader`: Indicates whether the reporting FE node is the Leader. Only the Leader FE (`is_leader="true"`) reports actual metric values. Followers will have `is_leader="false"` and report no data.
1744+
1745+
#### starrocks_fe_txn_total_latency_ms
1746+
1747+
- Unit: ms
1748+
- Type: Summary
1749+
- Description: The total latency for a transaction to complete, measured from the `prepare` time to the `finish` time. This metric represents the full end-to-end duration of a transaction.
1750+
1751+
#### starrocks_fe_txn_write_latency_ms
1752+
1753+
- Unit: ms
1754+
- Type: Summary
1755+
- Description: The latency of the `write` phase of a transaction, from `prepare` time to `commit` time. This metric isolates the performance of the data writing and preparation stage before the transaction is ready to be published.
1756+
1757+
#### starrocks_fe_txn_publish_latency_ms
1758+
1759+
- Unit: ms
1760+
- Type: Summary
1761+
- Description: The latency of the `publish` phase, from `commit` time to `finish` time. This is the duration it takes for a committed transaction to become visible to queries. It is the sum of the `schedule`, `execute`, and `ack` sub-phases.
1762+
1763+
#### starrocks_fe_txn_publish_schedule_latency_ms
1764+
1765+
- Unit: ms
1766+
- Type: Summary
1767+
- Description: The time a transaction spends waiting to be published after it has been committed, measured from `commit` time to when the publish task is picked up. This metric reflects scheduling delays or queueing time in the `publish` pipeline.
1768+
1769+
#### starrocks_fe_txn_publish_execute_latency_ms
1770+
1771+
- Unit: ms
1772+
- Type: Summary
1773+
- Description: The active execution time of the `publish` task, from when the task is picked up to when it finishes. This metric represents the actual time being spent to make the transaction's changes visible.
1774+
1775+
#### starrocks_fe_txn_publish_ack_latency_ms
1776+
1777+
- Unit: ms
1778+
- Type: Summary
1779+
- Description: The final acknowledgment latency, from when the `publish` task finishes to the final `finish` time when the transaction is marked as `VISIBLE`. This metric includes any final steps or acknowledgments required.
1780+
1781+
### Merge Commit BE Metrics
1782+
1783+
#### merge_commit_request_total
1784+
1785+
- Unit: Count
1786+
- Type: Cumulative
1787+
- Description: Total number of merge commit requests received by BE.
1788+
1789+
#### merge_commit_request_bytes
1790+
1791+
- Unit: Bytes
1792+
- Type: Cumulative
1793+
- Description: Total bytes of data received across merge commit requests.
1794+
1795+
#### merge_commit_success_total
1796+
1797+
- Unit: Count
1798+
- Type: Cumulative
1799+
- Description: Merge commit requests that finished successfully.
1800+
1801+
#### merge_commit_fail_total
1802+
1803+
- Unit: Count
1804+
- Type: Cumulative
1805+
- Description: Merge commit requests that failed.
1806+
1807+
#### merge_commit_pending_total
1808+
1809+
- Unit: Count
1810+
- Type: Instantaneous
1811+
- Description: Merge commit tasks currently waiting in the execution queue.
1812+
1813+
#### merge_commit_pending_bytes
1814+
1815+
- Unit: Bytes
1816+
- Type: Instantaneous
1817+
- Description: Total bytes of data held by pending merge commit tasks.
1818+
1819+
#### merge_commit_send_rpc_total
1820+
1821+
- Unit: Count
1822+
- Type: Cumulative
1823+
- Description: RPC requests sent to FE for starting merge commit operations.
1824+
1825+
#### merge_commit_register_pipe_total
1826+
1827+
- Unit: Count
1828+
- Type: Cumulative
1829+
- Description: Stream load pipes registered for merge commit operations.
1830+
1831+
#### merge_commit_unregister_pipe_total
1832+
1833+
- Unit: Count
1834+
- Type: Cumulative
1835+
- Description: Stream load pipes unregistered from merge commit operations.
1836+
1837+
Latency metrics expose percentile series such as `merge_commit_request_latency_99` and `merge_commit_request_latency_90`, reported in microseconds. The end-to-end latency obeys:
1838+
1839+
`merge_commit_request = merge_commit_pending + merge_commit_wait_plan + merge_commit_append_pipe + merge_commit_wait_finish`
1840+
1841+
> **Note**: Before v3.4.11, v3.5.12, and v4.0.4, these latency metrics were reported in nanoseconds.
1842+
1843+
#### merge_commit_request
1844+
1845+
- Unit: microsecond
1846+
- Type: Summary
1847+
- Description: End-to-end processing latency for merge commit requests.
1848+
1849+
#### merge_commit_pending
1850+
1851+
- Unit: microsecond
1852+
- Type: Summary
1853+
- Description: Time merge commit tasks spend waiting in the pending queue before execution.
1854+
1855+
#### merge_commit_wait_plan
1856+
1857+
- Unit: microsecond
1858+
- Type: Summary
1859+
- Description: Combined latency for the RPC request and waiting for the stream load pipe to become available.
1860+
1861+
#### merge_commit_append_pipe
1862+
1863+
- Unit: microsecond
1864+
- Type: Summary
1865+
- Description: Time spent appending data to the stream load pipe during merge commit.
1866+
1867+
#### merge_commit_wait_finish
1868+
1869+
- Unit: microsecond
1870+
- Type: Summary
1871+
- Description: Time spent waiting for merge commit load operations to finish.
1872+
>>>>>>> 39bc4eacc4 ([BugFix] Fix merge commit latency metrics overflow (#67168))

0 commit comments

Comments
 (0)