@@ -30,27 +30,27 @@ local DEFAULT_SEND_TIMEOUT = 1000
30
30
local DEFAULT_READ_TIMEOUT = 1000
31
31
local DEFAULT_HEALTH_DICT_NAME = " redis_cluster_health"
32
32
local err_unhealthy_master = " master node is unhealthy"
33
-
33
+ local health_check_running = false
34
34
local function generate_key (name , ip , port )
35
35
return name .. " :" .. ip .. " :" .. port
36
36
end
37
37
38
38
39
- local function health_check_timer (premature )
39
+ local function run_health_check (premature )
40
40
if premature then
41
41
return
42
42
end
43
43
44
- local health_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
45
- if not health_dict then
44
+ local unhealthy_nodes_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
45
+ if not unhealthy_nodes_dict then
46
46
return
47
47
end
48
48
49
- local all_keys = health_dict :get_keys ()
49
+ local all_keys = unhealthy_nodes_dict :get_keys ()
50
50
for _ , key in ipairs (all_keys ) do
51
51
local ip , port = string.match (key , " ^[^:]+:([^:]+):(%d+)$" )
52
52
if not ip or not port then
53
- health_dict :delete (key )
53
+ unhealthy_nodes_dict :delete (key )
54
54
goto continue
55
55
end
56
56
port = tonumber (port )
@@ -72,25 +72,26 @@ local function health_check_timer(premature)
72
72
end
73
73
-- Update health status based on check
74
74
if ok then
75
- health_dict :delete (key )
75
+ unhealthy_nodes_dict :delete (key )
76
76
ngx .log (ngx .WARN , " health check success for: " , ip , " :" , port )
77
77
else
78
- local failures = health_dict :get (key ) or 0
79
- health_dict :set (key , failures + 1 , 60 ) -- Unhealthy: increment failures with TTL
80
- ngx .log (ngx .WARN , " health check failed for: " , ip , " :" , port , " failures: " , failures + 1 )
78
+ local failures = unhealthy_nodes_dict :get (key ) or 0
79
+ unhealthy_nodes_dict :set (key , failures + 1 , 60 ) -- Unhealthy: increment failures with TTL
80
+ ngx .log (ngx .ERR , " health check failed for: " , ip , " :" , port , " - failures: " , failures + 1 )
81
81
end
82
82
83
83
:: continue::
84
84
end
85
85
end
86
86
87
+
87
88
local function track_node_failure (ip , port , name )
88
- local health_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
89
- if not health_dict then
89
+ local unhealthy_nodes_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
90
+ if not unhealthy_nodes_dict then
90
91
return
91
92
end
92
93
local key = generate_key (name , ip , port )
93
- health_dict :incr (key , 1 , 0 , 60 )
94
+ unhealthy_nodes_dict :incr (key , 1 , 0 , 60 )
94
95
end
95
96
96
97
local function parse_key (key_str )
@@ -113,15 +114,23 @@ local slot_cache = {}
113
114
local master_nodes = {}
114
115
115
116
117
+ local function health_check (premature )
118
+ if health_check_running then
119
+ return
120
+ end
121
+ health_check_running = true
122
+ pcall (run_health_check , premature )
123
+ health_check_running = false
124
+ end
116
125
117
126
local function is_node_healthy (ip , port , name )
118
- local health_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
119
- if not health_dict then
127
+ local unhealthy_nodes_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
128
+ if not unhealthy_nodes_dict then
120
129
return true
121
130
end
122
131
123
132
local key = generate_key (name , ip , port )
124
- local is_healthy = (health_dict :get (key ) or 0 ) <= 3
133
+ local is_healthy = (unhealthy_nodes_dict :get (key ) or 0 ) <= 3
125
134
return is_healthy
126
135
end
127
136
@@ -342,18 +351,18 @@ function _M.refresh_slots(self)
342
351
343
352
self :fetch_slots ()
344
353
-- Cleanup health dict entries for removed nodes
345
- local health_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
354
+ local unhealthy_nodes_dict = ngx .shared [DEFAULT_HEALTH_DICT_NAME ]
346
355
local current_nodes = {}
347
356
local servers = slot_cache [self .config .name .. " serv_list" ].serv_list
348
357
for _ , node in ipairs (servers ) do
349
358
local key = generate_key (self .config .name , node .ip , node .port )
350
359
current_nodes [key ] = true
351
360
end
352
361
-- Cleanup stale nodes
353
- local all_keys = health_dict :get_keys ()
362
+ local all_keys = unhealthy_nodes_dict :get_keys ()
354
363
for _ , key in ipairs (all_keys ) do
355
364
if not current_nodes [key ] then
356
- health_dict :delete (key )
365
+ unhealthy_nodes_dict :delete (key )
357
366
end
358
367
end
359
368
@@ -920,7 +929,7 @@ setmetatable(_M, {
920
929
})
921
930
922
931
function _M .init ()
923
- ngx .timer .every (1 , health_check_timer )
932
+ ngx .timer .every (1 , health_check )
924
933
end
925
934
926
935
return _M
0 commit comments