Skip to content

Commit 0e3e2eb

Browse files
authored
Add new metrics (#578)
Co-authored-by: Krish <>
1 parent 7c2a3ca commit 0e3e2eb

File tree

10 files changed

+782
-85
lines changed

10 files changed

+782
-85
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,4 @@ benchmarks/dashboard-stack/package-lock.json
7171

7272
# virtual environment
7373
.venv/
74+
.cache/

include/aws/s3/private/s3_request.h

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ struct aws_s3_request_metrics {
3131
struct aws_allocator *allocator;
3232

3333
struct {
34+
/* The time stamp when the request was first initiated by the client, including the very first attempt.
35+
* This timestamp is set once at the beginning and never updated during retries. Timestamps are from
36+
* `aws_high_res_clock_get_ticks`. This will always be available. */
37+
int64_t s3_request_first_attempt_start_timestamp_ns;
38+
39+
/* The time stamp when the request completely finished (success or final failure), including all retry
40+
* attempts. This is set when the request reaches its final state - either successful completion or
41+
* exhaustion of all retry attempts. Timestamps are from `aws_high_res_clock_get_ticks`. This will
42+
* be available with the last attempt. */
43+
int64_t s3_request_last_attempt_end_timestamp_ns;
44+
45+
/* The total time duration for the complete request lifecycle from initial start to final completion,
46+
* including all retry attempts, backoff delays, and connection establishment time
47+
* (s3_request_last_attempt_end_timestamp_ns - s3_request_first_attempt_start_timestamp_ns). This represents the
48+
* end-to-end user experience time. This will be available with the last attempt. */
49+
int64_t s3_request_total_duration_ns;
50+
3451
/* The time stamp when the request started by S3 client, which is prepared time by the client. Timestamps
3552
* are from `aws_high_res_clock_get_ticks`. This will always be available. */
3653
int64_t start_timestamp_ns;
@@ -90,6 +107,31 @@ struct aws_s3_request_metrics {
90107
/* The time duration for the request from start of delivery to finish of delivery (deliver_end_timestamp_ns -
91108
* deliver_start_timestamp_ns). When deliver_duration_ns is 0, means data not available. */
92109
int64_t deliver_duration_ns;
110+
111+
/* The time stamp when the request started to acquire connection. -1 means data not available. Timestamp
112+
* are from `aws_high_res_clock_get_ticks` */
113+
int64_t conn_acquire_start_timestamp_ns;
114+
/* The time stamp when the request finished to acquire connection. -1 means data not available.
115+
* Timestamp are from `aws_high_res_clock_get_ticks` */
116+
int64_t conn_acquire_end_timestamp_ns;
117+
/* The time duration for the request from start acquiring connection to finish acquiring connection
118+
* (conn_acquire_end_timestamp_ns - conn_acquire_start_timestamp_ns). When conn_acquire_end_timestamp_ns is -1,
119+
* means data not available. */
120+
int64_t conn_acquire_duration_ns;
121+
122+
/* The time stamp when the request started to delay for retry. -1 means data not available. Timestamp
123+
* are from `aws_high_res_clock_get_ticks` */
124+
int64_t retry_delay_start_timestamp_ns;
125+
/* The time stamp when the request finished to delay for retry. -1 means data not available.
126+
* Timestamp are from `aws_high_res_clock_get_ticks` */
127+
int64_t retry_delay_end_timestamp_ns;
128+
/* The time duration for the request from start retry delay to finish retry delay (retry_delay_end_timestamp_ns
129+
* - retry_delay_start_timestamp_ns). When retry_delay_end_timestamp_ns is -1, means data not available. */
130+
int64_t retry_delay_duration_ns;
131+
132+
/* The time duration for the service call from connection acquisition to first response byte. -1 means data not
133+
* available. */
134+
int64_t service_call_duration_ns;
93135
} time_metrics;
94136

95137
struct {
@@ -103,6 +145,8 @@ struct aws_s3_request_metrics {
103145
struct aws_string *host_address;
104146
/* The the request ID header value. */
105147
struct aws_string *request_id;
148+
/* The the extended request ID header value. */
149+
struct aws_string *extended_request_id;
106150
/* S3 operation name for the request */
107151
struct aws_string *operation_name;
108152
/* The type of request made */
@@ -113,7 +157,9 @@ struct aws_s3_request_metrics {
113157
/* The IP address of the request connected to */
114158
struct aws_string *ip_address;
115159
/* The pointer to the connection that request was made from */
116-
void *connection_id;
160+
void *connection_ptr;
161+
/* The pointer to the request that the request attempt was made from */
162+
void *request_ptr;
117163
/* The aws_thread_id_t to the thread that request ran on */
118164
aws_thread_id_t thread_id;
119165
/* The stream-id, which is the idex when the stream was activated. */
@@ -133,6 +179,13 @@ struct aws_s3_request {
133179
/* Linked list node used for queuing. */
134180
struct aws_linked_list_node node;
135181

182+
/* Timestamp when retry attempt started. Overwritten on each retry and copied to new attempt's setup data.
183+
* -1 means data not available. Timestamp from `aws_high_res_clock_get_ticks` */
184+
int64_t retry_start_timestamp_ns;
185+
/* Timestamp when retry attempt ended. Overwritten on each retry and copied to new attempt's setup data.
186+
* -1 means data not available. Timestamp from `aws_high_res_clock_get_ticks` */
187+
int64_t retry_end_timestamp_ns;
188+
136189
/* Linked list node used for tracking the request is active from HTTP level. */
137190
struct aws_linked_list_node cancellable_http_streams_list_node;
138191

@@ -250,7 +303,7 @@ struct aws_s3_request {
250303

251304
/* Returned request ID of this request. */
252305
struct aws_string *request_id;
253-
/* Returned amz ID 2 of this request. */
306+
/* Returned extended request id of this request. */
254307
struct aws_string *amz_id_2;
255308

256309
/* The metrics for the request telemetry */

include/aws/s3/s3_client.h

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,17 @@ int aws_s3_request_metrics_get_request_id(
13901390
const struct aws_s3_request_metrics *metrics,
13911391
const struct aws_string **out_request_id);
13921392

1393+
/**
1394+
* Get the extended request ID from aws_s3_request_metrics.
1395+
* If unavailable, AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised.
1396+
* If available, out_extended_request_id will be set to a string. Be warned this string's lifetime is tied to the
1397+
* metrics object.
1398+
**/
1399+
AWS_S3_API
1400+
int aws_s3_request_metrics_get_extended_request_id(
1401+
const struct aws_s3_request_metrics *metrics,
1402+
const struct aws_string **out_extended_request_id);
1403+
13931404
/* Get the start time from aws_s3_request_metrics, which is when S3 client prepare the request to be sent. Always
13941405
* available. Timestamp are from `aws_high_res_clock_get_ticks` */
13951406
AWS_S3_API
@@ -1563,10 +1574,14 @@ int aws_s3_request_metrics_get_ip_address(
15631574
const struct aws_s3_request_metrics *metrics,
15641575
const struct aws_string **out_ip_address);
15651576

1566-
/* Get the id of connection that request was made from. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data
1567-
* not available */
1577+
/* Get the ptr address of connection that request was made from. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised
1578+
* if data not available */
15681579
AWS_S3_API
1569-
int aws_s3_request_metrics_get_connection_id(const struct aws_s3_request_metrics *metrics, size_t *out_connection_id);
1580+
int aws_s3_request_metrics_get_connection_id(const struct aws_s3_request_metrics *metrics, size_t *out_connection_ptr);
1581+
1582+
/* Get the pointer to the request that attempt was made from. Always available. */
1583+
AWS_S3_API
1584+
int aws_s3_request_metrics_get_request_ptr(const struct aws_s3_request_metrics *metrics, size_t *out_request_ptr);
15701585

15711586
/* Get the thread ID of the thread that request was made from. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if
15721587
* data not available */
@@ -1604,6 +1619,70 @@ int aws_s3_request_metrics_get_error_code(const struct aws_s3_request_metrics *m
16041619
AWS_S3_API
16051620
uint32_t aws_s3_request_metrics_get_retry_attempt(const struct aws_s3_request_metrics *metrics);
16061621

1622+
/* Get the request start timestamp from aws_s3_request_metrics. Always available. */
1623+
AWS_S3_API
1624+
void aws_s3_request_metrics_get_s3_request_first_attempt_start_timestamp_ns(
1625+
const struct aws_s3_request_metrics *metrics,
1626+
uint64_t *out_s3_request_first_attempt_start_time);
1627+
1628+
/* Get the request end timestamp. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not
1629+
* available. */
1630+
AWS_S3_API
1631+
int aws_s3_request_metrics_get_s3_request_last_attempt_end_timestamp_ns(
1632+
const struct aws_s3_request_metrics *metrics,
1633+
uint64_t *out_s3_request_last_attempt_end_time);
1634+
1635+
/* Get the request duration. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not
1636+
* available. */
1637+
AWS_S3_API
1638+
int aws_s3_request_metrics_get_s3_request_total_duration_ns(
1639+
const struct aws_s3_request_metrics *metrics,
1640+
uint64_t *out_request_duration);
1641+
1642+
/* Get the connection acquire start timestamp. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not
1643+
* available. */
1644+
AWS_S3_API
1645+
int aws_s3_request_metrics_get_conn_acquire_start_timestamp_ns(
1646+
const struct aws_s3_request_metrics *metrics,
1647+
uint64_t *out_conn_acquire_start_time);
1648+
1649+
/* Get the connection acquire end timestamp. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not
1650+
* available. */
1651+
AWS_S3_API
1652+
int aws_s3_request_metrics_get_conn_acquire_end_timestamp_ns(
1653+
const struct aws_s3_request_metrics *metrics,
1654+
uint64_t *out_conn_acquire_end_time);
1655+
1656+
/* Get the connection acquire duration. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not available. */
1657+
AWS_S3_API
1658+
int aws_s3_request_metrics_get_conn_acquire_duration_ns(
1659+
const struct aws_s3_request_metrics *metrics,
1660+
uint64_t *out_conn_acquire_duration);
1661+
1662+
/* Get the retry delay start timestamp. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not available. */
1663+
AWS_S3_API
1664+
int aws_s3_request_metrics_get_retry_delay_start_timestamp_ns(
1665+
const struct aws_s3_request_metrics *metrics,
1666+
uint64_t *out_retry_delay_start_time);
1667+
1668+
/* Get the retry delay end timestamp. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not available. */
1669+
AWS_S3_API
1670+
int aws_s3_request_metrics_get_retry_delay_end_timestamp_ns(
1671+
const struct aws_s3_request_metrics *metrics,
1672+
uint64_t *out_retry_delay_end_time);
1673+
1674+
/* Get the retry delay duration. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not available. */
1675+
AWS_S3_API
1676+
int aws_s3_request_metrics_get_retry_delay_duration_ns(
1677+
const struct aws_s3_request_metrics *metrics,
1678+
uint64_t *out_retry_delay_duration);
1679+
1680+
/* Get the service call duration. AWS_ERROR_S3_METRIC_DATA_NOT_AVAILABLE will be raised if data not available. */
1681+
AWS_S3_API
1682+
int aws_s3_request_metrics_get_service_call_duration_ns(
1683+
const struct aws_s3_request_metrics *metrics,
1684+
uint64_t *out_service_call_duration);
1685+
16071686
AWS_EXTERN_C_END
16081687
AWS_POP_SANE_WARNING_LEVEL
16091688

source/s3_client.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,6 +2166,16 @@ void aws_s3_client_update_connections_threaded(struct aws_s3_client *client) {
21662166
} else if (!request->always_send && aws_s3_meta_request_has_finish_result(request->meta_request)) {
21672167
/* Unless the request is marked "always send", if this meta request has a finish result, then finish the
21682168
* request now and release it. */
2169+
/* Update the error code for the metrics of the request here since we never acquire/release a connection */
2170+
request->send_data.metrics->crt_info_metrics.error_code = AWS_ERROR_S3_CANCELED;
2171+
2172+
/* Record end times as well */
2173+
aws_high_res_clock_get_ticks(
2174+
(uint64_t *)&request->send_data.metrics->time_metrics.s3_request_last_attempt_end_timestamp_ns);
2175+
request->send_data.metrics->time_metrics.s3_request_total_duration_ns =
2176+
request->send_data.metrics->time_metrics.s3_request_last_attempt_end_timestamp_ns -
2177+
request->send_data.metrics->time_metrics.s3_request_first_attempt_start_timestamp_ns;
2178+
21692179
s_s3_client_meta_request_finished_request(client, request->meta_request, request, AWS_ERROR_S3_CANCELED);
21702180
request = aws_s3_request_release(request);
21712181
} else if (
@@ -2299,6 +2309,8 @@ static void s_s3_client_acquired_retry_token(
22992309
/* TODO: not a blocker, consider managing the life time of aws_s3_client from aws_s3_endpoint to simplify usage */
23002310
aws_s3_client_acquire(client);
23012311

2312+
aws_high_res_clock_get_ticks((uint64_t *)&request->send_data.metrics->time_metrics.conn_acquire_start_timestamp_ns);
2313+
23022314
client->vtable->acquire_http_connection(
23032315
endpoint->http_connection_manager, s_s3_client_on_acquire_http_connection, connection);
23042316

@@ -2356,6 +2368,11 @@ static void s_s3_client_on_acquire_http_connection(
23562368
goto error_retry;
23572369
}
23582370

2371+
aws_high_res_clock_get_ticks((uint64_t *)&request->send_data.metrics->time_metrics.conn_acquire_end_timestamp_ns);
2372+
request->send_data.metrics->time_metrics.conn_acquire_duration_ns =
2373+
request->send_data.metrics->time_metrics.conn_acquire_end_timestamp_ns -
2374+
request->send_data.metrics->time_metrics.conn_acquire_start_timestamp_ns;
2375+
23592376
connection->http_connection = incoming_http_connection;
23602377
aws_s3_meta_request_send_request(meta_request, connection);
23612378
aws_s3_client_release(client); /* kept since this callback was registered */
@@ -2455,6 +2472,7 @@ void aws_s3_client_notify_connection_finished(
24552472
}
24562473

24572474
/* Ask the retry strategy to schedule a retry of the request. */
2475+
aws_high_res_clock_get_ticks((uint64_t *)&request->retry_start_timestamp_ns);
24582476
if (aws_retry_strategy_schedule_retry(
24592477
connection->retry_token, error_type, s_s3_client_retry_ready, connection)) {
24602478

@@ -2476,6 +2494,12 @@ void aws_s3_client_notify_connection_finished(
24762494

24772495
reset_connection:
24782496

2497+
aws_high_res_clock_get_ticks(
2498+
(uint64_t *)&request->send_data.metrics->time_metrics.s3_request_last_attempt_end_timestamp_ns);
2499+
request->send_data.metrics->time_metrics.s3_request_total_duration_ns =
2500+
request->send_data.metrics->time_metrics.s3_request_last_attempt_end_timestamp_ns -
2501+
request->send_data.metrics->time_metrics.s3_request_first_attempt_start_timestamp_ns;
2502+
24792503
if (connection->retry_token != NULL) {
24802504
/* If we have a retry token and successfully finished, record that success. */
24812505
if (finish_code == AWS_S3_CONNECTION_FINISH_CODE_SUCCESS) {
@@ -2544,6 +2568,8 @@ static void s_s3_client_retry_ready(struct aws_retry_token *token, int error_cod
25442568
struct aws_s3_request *request = connection->request;
25452569
AWS_PRECONDITION(request);
25462570

2571+
aws_high_res_clock_get_ticks((uint64_t *)&request->retry_end_timestamp_ns);
2572+
25472573
struct aws_s3_meta_request *meta_request = request->meta_request;
25482574
AWS_PRECONDITION(meta_request);
25492575

source/s3_meta_request.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,10 @@ static int s_s3_meta_request_incoming_headers(
13801380
}
13811381
if (request->send_data.amz_id_2 == NULL && aws_byte_cursor_eq(name, &g_amz_id_2_header_name)) {
13821382
request->send_data.amz_id_2 = aws_string_new_from_cursor(connection->request->allocator, value);
1383+
if (collect_metrics) {
1384+
request->send_data.metrics->req_resp_info_metrics.extended_request_id =
1385+
aws_string_new_from_cursor(connection->request->allocator, value);
1386+
}
13831387
}
13841388
if (collect_metrics) {
13851389
aws_http_headers_add(request->send_data.metrics->req_resp_info_metrics.response_headers, *name, *value);
@@ -1518,13 +1522,19 @@ static void s_s3_meta_request_stream_metrics(
15181522
s3_metrics->time_metrics.send_end_timestamp_ns = http_metrics->send_end_timestamp_ns;
15191523
s3_metrics->time_metrics.sending_duration_ns = http_metrics->sending_duration_ns;
15201524
s3_metrics->time_metrics.receive_start_timestamp_ns = http_metrics->receive_start_timestamp_ns;
1525+
1526+
if (s3_metrics->time_metrics.receive_start_timestamp_ns != -1) {
1527+
s3_metrics->time_metrics.service_call_duration_ns = s3_metrics->time_metrics.receive_start_timestamp_ns -
1528+
s3_metrics->time_metrics.conn_acquire_start_timestamp_ns;
1529+
}
1530+
15211531
s3_metrics->time_metrics.receive_end_timestamp_ns = http_metrics->receive_end_timestamp_ns;
15221532
s3_metrics->time_metrics.receiving_duration_ns = http_metrics->receiving_duration_ns;
15231533

15241534
s3_metrics->crt_info_metrics.stream_id = http_metrics->stream_id;
15251535

15261536
/* Also related metrics from the request/response. */
1527-
s3_metrics->crt_info_metrics.connection_id = (void *)connection->http_connection;
1537+
s3_metrics->crt_info_metrics.connection_ptr = (void *)connection->http_connection;
15281538
const struct aws_socket_endpoint *endpoint = aws_http_connection_get_remote_endpoint(connection->http_connection);
15291539
request->send_data.metrics->crt_info_metrics.ip_address =
15301540
aws_string_new_from_c_str(request->allocator, endpoint->address);
@@ -1902,6 +1912,7 @@ static struct aws_s3_request_metrics *s_s3_request_finish_up_and_release_metrics
19021912
aws_high_res_clock_get_ticks((uint64_t *)&metrics->time_metrics.end_timestamp_ns);
19031913
metrics->time_metrics.total_duration_ns =
19041914
metrics->time_metrics.end_timestamp_ns - metrics->time_metrics.start_timestamp_ns;
1915+
aws_high_res_clock_get_ticks((uint64_t *)&metrics->time_metrics.s3_request_last_attempt_end_timestamp_ns);
19051916
}
19061917

19071918
if (meta_request->telemetry_callback != NULL) {

0 commit comments

Comments
 (0)