Skip to content

Commit 8f039fc

Browse files
fix: review comments
1 parent d76429b commit 8f039fc

7 files changed

Lines changed: 171 additions & 68 deletions

File tree

constant/healthstatus.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package constant
2+
3+
// HealthStatus represents the possible health states of a service
4+
// or dependency within the system.
5+
//
6+
// It is used to standardize how health information is communicated across
7+
// services — for example, in HTTP health check endpoints (/ping).
8+
// Defining these values as constants ensures consistent usage throughout the codebase.
9+
type HealthStatus string
10+
11+
const (
12+
// HealthStatusHealthy indicates that the service is operating
13+
// normally, and all dependent subsystems are functioning as expected.
14+
15+
HealthStatusHealthy HealthStatus = "healthy"
16+
17+
// HealthStatusUnhealthy indicates that the component or service is
18+
// degraded or non-functional, possibly due to dependency failures,
19+
// connectivity issues, or internal errors.
20+
//
21+
// This status will cause readiness and liveness probes to fail
22+
HealthStatusUnhealthy HealthStatus = "unhealthy"
23+
)

health/registry.go

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,41 @@
1+
// Package health provides a simple health check framework that allows
2+
// different components or dependencies to register their own health check functions.
3+
// Each checker function returns an error to indicate an unhealthy state,
4+
// or nil to indicate that the component is healthy.
15
package health
26

7+
import "github.com/goto/raccoon/constant"
8+
9+
// Checker represents a function that performs a health check for a component.
10+
// It returns an error if the check fails (unhealthy), or nil if it passes (healthy).
311
type Checker func() error
412

513
var (
14+
// checkers stores all registered health check functions.
15+
// The key is the component name, and the value is its corresponding Checker function.
616
checkers = make(map[string]Checker)
717
)
818

19+
// Register adds a new health check function for a specific component.
20+
// Each component should have a unique name; calling Register with an existing
21+
// name will overwrite the previous checker.
922
func Register(name string, fn Checker) {
1023
checkers[name] = fn
1124
}
1225

13-
func CheckAll() map[string]string {
14-
results := make(map[string]string)
26+
// CheckAll runs all registered health check functions and returns their results.
27+
// It returns a map where the key is the component name and the value is its health status.
28+
// Components that return an error are marked as Unhealthy; others are marked as Healthy.
29+
func CheckAll() map[string]constant.HealthStatus {
30+
results := make(map[string]constant.HealthStatus)
31+
1532
for name, fn := range checkers {
1633
if err := fn(); err != nil {
17-
results[name] = "unhealthy: " + err.Error()
34+
results[name] = constant.HealthStatusUnhealthy
1835
} else {
19-
results[name] = "healthy"
36+
results[name] = constant.HealthStatusHealthy
2037
}
2138
}
39+
2240
return results
2341
}

health/registry_test.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"errors"
55
"reflect"
66
"testing"
7+
8+
"github.com/goto/raccoon/constant"
79
)
810

911
func TestRegister(t *testing.T) {
@@ -26,24 +28,24 @@ func TestCheckAll(t *testing.T) {
2628
tests := []struct {
2729
name string
2830
register map[string]Checker
29-
expected map[string]string
31+
expected map[string]constant.HealthStatus
3032
}{
3133
{
3234
name: "single healthy service on register map",
3335
register: map[string]Checker{
3436
"serviceA": func() error { return nil },
3537
},
36-
expected: map[string]string{
37-
"serviceA": "healthy",
38+
expected: map[string]constant.HealthStatus{
39+
"serviceA": constant.HealthStatusHealthy,
3840
},
3941
},
4042
{
4143
name: "single unhealthy service on register map",
4244
register: map[string]Checker{
4345
"serviceB": func() error { return errors.New("timeout") },
4446
},
45-
expected: map[string]string{
46-
"serviceB": "unhealthy: timeout",
47+
expected: map[string]constant.HealthStatus{
48+
"serviceB": constant.HealthStatusUnhealthy,
4749
},
4850
},
4951
{
@@ -52,16 +54,16 @@ func TestCheckAll(t *testing.T) {
5254
"ok": func() error { return nil },
5355
"fail": func() error { return errors.New("db down") },
5456
},
55-
expected: map[string]string{
56-
"ok": "healthy",
57-
"fail": "unhealthy: db down",
57+
expected: map[string]constant.HealthStatus{
58+
"ok": constant.HealthStatusHealthy,
59+
"fail": constant.HealthStatusUnhealthy,
5860
},
5961
},
6062
}
6163

6264
for _, tt := range tests {
6365
t.Run(tt.name, func(t *testing.T) {
64-
//rest the register map for every test scenario
66+
// reset the register map for every test scenario
6567
resetCheckers()
6668

6769
for name, fn := range tt.register {
@@ -76,6 +78,7 @@ func TestCheckAll(t *testing.T) {
7678
}
7779
}
7880

81+
// resetCheckers clears all registered checkers to ensure isolation between tests.
7982
func resetCheckers() {
8083
checkers = make(map[string]Checker)
8184
}

services/mqtt/client/pubsub.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/gojekfarm/xtools/xproto"
99
"github.com/goto/raccoon/config"
1010
"github.com/goto/raccoon/logger"
11+
"github.com/goto/raccoon/metrics"
1112
"io"
1213
)
1314

@@ -17,13 +18,30 @@ type MqttPubSubClient struct {
1718
resolver resolver
1819
}
1920

21+
// client defines the minimal interface for a courier MQTT client instance.
22+
// It abstracts basic lifecycle operations and connection state management.
2023
type client interface {
24+
// Start begins the client’s operation and establishes connections
25+
// to the MQTT broker or messaging backend.
2126
Start() error
27+
28+
// Stop terminates the client’s operation gracefully, releasing
29+
// any resources and closing network connections.
2230
Stop()
31+
32+
// IsConnected reports whether the client is currently connected
33+
// to the MQTT broker.
2334
IsConnected() bool
2435
}
2536

37+
// resolver defines the interface for a service discovery resolver
38+
// responsible for discovering and updating MQTT broker endpoints dynamically.
39+
//
40+
// Implementations like ConsulResolver use this interface to periodically
41+
// fetch and update broker addresses for high availability and fault tolerance.
2642
type resolver interface {
43+
// Start initiates the resolver’s background process to watch for
44+
// endpoint changes.
2745
Start()
2846
}
2947

@@ -78,6 +96,10 @@ func registerHandler(ctx context.Context, handler courier.MessageHandler) func(c
7896
return func(ps courier.PubSub) {
7997
topic := config.ServerMQTT.ConsumerConfig.TopicFormat
8098
if err := ps.Subscribe(ctx, topic, handler, courier.QOSZero); err != nil {
99+
metrics.Increment(
100+
"mqtt_error",
101+
fmt.Sprintf("reason=subscribe_failed"),
102+
)
81103
logger.Errorf("failed to register MQTT handler for topic %q: %v", topic, err)
82104
} else {
83105
logger.Infof("successfully registered MQTT handler for topic %q", topic)
@@ -89,6 +111,10 @@ func registerHandler(ctx context.Context, handler courier.MessageHandler) func(c
89111
func (m *MqttPubSubClient) Start() error {
90112
go m.resolver.Start()
91113
if err := m.client.Start(); err != nil {
114+
metrics.Increment(
115+
"mqtt_error",
116+
fmt.Sprintf("reason=start_failed"),
117+
)
92118
logger.Infof("MQTT client start failed due to %v", err)
93119
return fmt.Errorf("failed to start MQTT client: %w", err)
94120
}

services/mqtt/handler.go

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ import (
44
pb "buf.build/gen/go/gotocompany/proton/protocolbuffers/go/gotocompany/raccoon/v1beta1"
55
"context"
66
"fmt"
7-
"github.com/goto/raccoon/clients/go/log"
8-
"github.com/goto/raccoon/serialization"
97
"time"
108

119
"github.com/gojek/courier-go"
10+
"github.com/goto/raccoon/clients/go/log"
1211
"github.com/goto/raccoon/collection"
1312
"github.com/goto/raccoon/config"
1413
"github.com/goto/raccoon/identification"
1514
"github.com/goto/raccoon/metrics"
15+
"github.com/goto/raccoon/serialization"
1616
"google.golang.org/protobuf/proto"
1717
)
1818

@@ -25,69 +25,81 @@ type Handler struct {
2525
// and sends them to the Collector.
2626
func (h *Handler) MQTTHandler(ctx context.Context, c courier.PubSub, message *courier.Message) {
2727
start := time.Now()
28-
29-
identifier := identification.Identifier{
30-
Group: config.ServerMQTT.ConnGroup,
31-
}
28+
group := config.ServerMQTT.ConnGroup
3229
log.Infof("MQTT message received with content %v", message)
3330

3431
var req pb.SendEventRequest
3532
if err := message.DecodePayload(&req); err != nil {
36-
metrics.Increment(
37-
"batches_read_total",
38-
fmt.Sprintf("status=failed,conn_group=%s,reason=serde", identifier.Group),
39-
)
40-
log.Errorf("mqtt message decoding failed due to : %v", err)
33+
h.recordMetrics("request", fmt.Sprintf("status=failed,conn_group=%s,reason=serde", group), nil)
34+
log.Errorf("mqtt message decoding failed: %v", err)
4135
return
4236
}
4337

4438
if proto.Equal(&req, &pb.SendEventRequest{}) {
45-
metrics.Increment(
46-
"batches_read_total",
47-
fmt.Sprintf("status=failed,conn_group=%s,reason=empty", identifier.Group),
48-
)
39+
h.recordMetrics("request", fmt.Sprintf("status=failed,conn_group=%s,reason=empty", group), nil)
4940
log.Errorf("mqtt request message according proto format is empty")
5041
return
5142
}
5243

53-
//to be removed post end-to-end test
44+
// Debug — can be removed after E2E verification
5445
for _, event := range req.Events {
55-
log.Infof("MQTT message content post deserialization event : %v", event)
46+
log.Infof("MQTT message content post deserialization event: %v", event)
5647
}
57-
log.Infof("MQTT message request id %v", req.ReqGuid)
48+
log.Infof("MQTT message request id: %v", req.ReqGuid)
5849

59-
//instrument the request number
60-
metrics.Increment(
61-
"batches_read_total",
62-
fmt.Sprintf("status=success,conn_group=%s", identifier.Group),
63-
)
64-
//instrument the request size
50+
// Serialize to compute request size
6551
reqBytes, err := serialization.SerializeProto(&req)
6652
if err != nil {
67-
log.Errorf("mqtt message serialization failed : %v", err)
68-
} else {
69-
metrics.Count("request_bytes_total", len(reqBytes),
70-
fmt.Sprintf("conn_group=%s", identifier.Group))
53+
log.Errorf("mqtt message serialization failed: %v", err)
7154
}
7255

73-
h.recordEventMetrics(req.Events, identifier.Group)
56+
// Record all metrics via generic function
57+
h.recordMetrics("request", fmt.Sprintf("status=success,conn_group=%s", group), reqBytes)
58+
h.recordMetrics("event", fmt.Sprintf("conn_group=%s", group), req.Events)
7459

7560
h.Collector.Collect(ctx, &collection.CollectRequest{
76-
ConnectionIdentifier: identifier,
61+
ConnectionIdentifier: identification.Identifier{Group: group},
7762
TimeConsumed: start,
7863
SendEventRequest: &req,
7964
AckFunc: nil,
8065
})
8166
}
8267

83-
// recordEventMetrics updates per-event metrics like byte size and event count.
84-
func (h *Handler) recordEventMetrics(events []*pb.Event, group string) {
68+
// recordMetrics is a generic entry function that routes metric recording
69+
// based on metricName.
70+
func (h *Handler) recordMetrics(metricName string, tags string, data any) {
71+
switch metricName {
72+
case "request":
73+
h.recordRequestMetrics(tags, data)
74+
case "event":
75+
h.recordEventMetrics(tags, data)
76+
default:
77+
log.Errorf("unknown metricName=%s ignored", metricName)
78+
}
79+
}
80+
81+
// recordRequestMetrics captures request-level metrics (success/failure, bytes).
82+
func (h *Handler) recordRequestMetrics(tags string, data any) {
83+
metrics.Increment("batches_read_total", tags)
84+
85+
if reqBytes, ok := data.([]byte); ok && len(reqBytes) > 0 {
86+
metrics.Count("request_bytes_total", len(reqBytes), tags)
87+
}
88+
}
89+
90+
// recordEventMetrics captures per-event metrics like count and size.
91+
func (h *Handler) recordEventMetrics(tags string, data any) {
92+
events, ok := data.([]*pb.Event)
93+
if !ok {
94+
return
95+
}
96+
8597
for _, e := range events {
8698
if e == nil {
8799
continue
88100
}
89-
tags := fmt.Sprintf("conn_group=%s,event_type=%s", group, e.Type)
90-
metrics.Count("events_rx_bytes_total", len(e.EventBytes), tags)
91-
metrics.Increment("events_rx_total", tags)
101+
eventTags := fmt.Sprintf("%s,event_type=%s", tags, e.Type)
102+
metrics.Increment("events_rx_total", eventTags)
103+
metrics.Count("events_rx_bytes_total", len(e.EventBytes), eventTags)
92104
}
93105
}

0 commit comments

Comments
 (0)