77use GuzzleHttp \Exception \GuzzleException ;
88use GuzzleHttp \Promise \Utils ;
99use Psr \Http \Message \ResponseInterface ;
10+ use function Workbunny \WebmanNacos \get_local_ip ;
1011use Workerman \Timer ;
1112use Workerman \Worker ;
1213
@@ -26,15 +27,28 @@ class InstanceRegistrarProcess extends AbstractProcess
2627 */
2728 protected array $ heartbeatTimers = [];
2829
30+ /**
31+ * 每个实例的连续心跳失败计数
32+ * @var array<string,int>
33+ */
34+ protected array $ heartbeatFailCount = [];
35+
2936 /**
3037 * @var float
3138 */
3239 protected float $ heartbeat ;
3340
41+ /**
42+ * 允许的最大连续心跳失败次数,超过才重启进程
43+ * @var int
44+ */
45+ protected int $ heartbeatFailMax ;
46+
3447 public function __construct ()
3548 {
3649 parent ::__construct ();
3750 $ this ->heartbeat = (float ) config ('plugin.workbunny.webman-nacos.app.instance_heartbeat ' , 5.0 );
51+ $ this ->heartbeatFailMax = (int ) config ('plugin.workbunny.webman-nacos.app.instance_heartbeat_fail_max ' , 3 );
3852 }
3953
4054 /**
@@ -44,50 +58,97 @@ public function __construct()
4458 */
4559 protected function _heartbeat (string $ name ): void
4660 {
47- if (isset ($ this ->instanceRegistrars [$ name ])) {
48- list ($ serviceName , $ ip , $ port , $ option ) = $ this ->instanceRegistrars [$ name ];
49- if (isset ($ option ['ephemeral ' ])) {
50- $ option ['ephemeral ' ] = (is_string ($ option ['ephemeral ' ]) ? filter_var ($ option ['ephemeral ' ], FILTER_VALIDATE_BOOLEAN , FILTER_NULL_ON_FAILURE ) : (bool ) $ option ['ephemeral ' ]);
61+ if (!isset ($ this ->instanceRegistrars [$ name ])) {
62+ return ;
63+ }
64+ $ instanceRegistrar = $ this ->instanceRegistrars [$ name ];
65+ $ serviceName = $ instanceRegistrar ['service_name ' ];
66+ $ ip = $ instanceRegistrar ['pod_ip ' ] ?: get_local_ip ();
67+ $ port = $ instanceRegistrar ['pod_port ' ];
68+ $ option = $ instanceRegistrar ['options ' ] ?? [];
69+
70+ // 关键修复:Nacos OpenAPI 的 ephemeral 默认值为 true(临时实例)
71+ // 当用户未显式指定时必须按 true 处理,否则会导致临时实例不发心跳而被 Nacos 摘除
72+ if (array_key_exists ('ephemeral ' , $ option )) {
73+ $ ephemeral = is_string ($ option ['ephemeral ' ])
74+ ? filter_var ($ option ['ephemeral ' ], FILTER_VALIDATE_BOOLEAN )
75+ : (bool ) $ option ['ephemeral ' ];
76+ } else {
77+ $ ephemeral = true ;
78+ }
79+ $ option ['ephemeral ' ] = $ ephemeral ;
80+
81+ // 仅对临时实例进行心跳(永久实例由服务端健康检查维持)
82+ if (!$ ephemeral ) {
83+ return ;
84+ }
85+
86+ $ this ->heartbeatFailCount [$ name ] = 0 ;
87+ $ this ->heartbeatTimers [$ name ] = Timer::add ($ this ->heartbeat , function () use ($ name , $ serviceName , $ ip , $ port , $ option ) {
88+ if ($ this ->is_stopping ) {
89+ return ;
5190 }
52- // 仅对非永久实例进行心跳
53- if ($ option ['ephemeral ' ] ?? false ) {
54- $ this ->heartbeatTimers [$ name ] = Timer::add ($ this ->heartbeat , function () use ($ name , $ serviceName , $ ip , $ port , $ option ) {
55- if ($ this ->is_stopping ) {
56- return ;
57- }
58- try {
59- if (!$ this ->client ->instance ->beat (
60- $ serviceName ,
61- array_filter ([
62- 'ip ' => $ ip ,
63- 'port ' => $ port ,
64- 'serviceName ' => $ serviceName ,
65- ] + $ option , fn ($ value ) => $ value !== null ),
66- $ option ['groupName ' ] ?? null ,
67- $ option ['namespaceId ' ] ?? null ,
68- $ option ['ephemeral ' ] ?? null ,
69- false ,
70- $ this ->heartbeat
71- )) {
72- $ this ->logger ()->error (
73- "Nacos instance heartbeat failed: [0] {$ this ->client ->instance ->getMessage ()}. " ,
74- ['name ' => $ name , 'trace ' => []]
75- );
76- $ this ->_stop ($ this ->retry_interval );
91+ // beat JSON 只保留 Nacos 识别的标准字段,避免非标字段污染
92+ $ beatData = array_filter ([
93+ 'ip ' => $ ip ,
94+ 'port ' => $ port ,
95+ 'serviceName ' => $ serviceName ,
96+ 'cluster ' => $ option ['clusterName ' ] ?? ($ option ['cluster ' ] ?? null ),
97+ 'weight ' => $ option ['weight ' ] ?? null ,
98+ 'metadata ' => $ option ['metadata ' ] ?? null ,
99+ 'scheduled ' => $ option ['scheduled ' ] ?? null ,
100+ ], fn ($ value ) => $ value !== null );
77101
78- return ;
79- }
80- } catch (GuzzleException $ exception ) {
81- $ this ->logger ()->error (
82- "Nacos instance heartbeat failed: [ {$ exception ->getCode ()}] {$ exception ->getMessage ()}. " ,
83- ['name ' => $ name , 'trace ' => $ exception ->getTrace ()]
84- );
85- $ this ->_stop ($ this ->retry_interval );
86-
87- return ;
88- }
89- });
102+ try {
103+ $ result = $ this ->client ->instance ->beat (
104+ $ serviceName ,
105+ $ beatData ,
106+ $ option ['groupName ' ] ?? null ,
107+ $ option ['namespaceId ' ] ?? null ,
108+ $ option ['ephemeral ' ] ?? null ,
109+ false ,
110+ $ this ->heartbeat
111+ );
112+ if ($ result === false ) {
113+ $ this ->_onHeartbeatFail (
114+ $ name ,
115+ "Nacos instance heartbeat failed: [0] {$ this ->client ->instance ->getMessage ()}. " ,
116+ []
117+ );
118+
119+ return ;
120+ }
121+ // 心跳成功,重置失败计数
122+ $ this ->heartbeatFailCount [$ name ] = 0 ;
123+ } catch (\Throwable $ exception ) {
124+ $ this ->_onHeartbeatFail (
125+ $ name ,
126+ "Nacos instance heartbeat failed: [ {$ exception ->getCode ()}] {$ exception ->getMessage ()}. " ,
127+ $ exception ->getTrace ()
128+ );
90129 }
130+ });
131+ }
132+
133+ /**
134+ * 心跳失败处理:累计 N 次失败才重启进程,避免偶发抖动导致雪崩
135+ * @param string $name
136+ * @param string $message
137+ * @param array $trace
138+ * @return void
139+ */
140+ protected function _onHeartbeatFail (string $ name , string $ message , array $ trace = []): void
141+ {
142+ $ this ->heartbeatFailCount [$ name ] = ($ this ->heartbeatFailCount [$ name ] ?? 0 ) + 1 ;
143+ $ count = $ this ->heartbeatFailCount [$ name ];
144+ $ this ->logger ()->error ($ message , [
145+ 'name ' => $ name ,
146+ 'fail_count ' => $ count ,
147+ 'max ' => $ this ->heartbeatFailMax ,
148+ 'trace ' => $ trace ,
149+ ]);
150+ if ($ count >= $ this ->heartbeatFailMax ) {
151+ $ this ->_stop ($ this ->retry_interval );
91152 }
92153 }
93154
@@ -103,7 +164,7 @@ public function onWorkerStart(Worker $worker)
103164 foreach ($ instanceRegistrars as $ name => $ instanceRegistrar ) {
104165 // 拆解配置
105166 $ serviceName = $ instanceRegistrar ['service_name ' ];
106- $ ip = $ instanceRegistrar ['pod_ip ' ];
167+ $ ip = $ instanceRegistrar ['pod_ip ' ] ?: get_local_ip () ;
107168 $ port = $ instanceRegistrar ['pod_port ' ];
108169 $ option = $ instanceRegistrar ['options ' ] ?? [];
109170 // 注册
@@ -153,18 +214,19 @@ public function onWorkerStop(Worker $worker)
153214 }
154215 // 拆解配置
155216 $ serviceName = $ instanceRegistrar ['service_name ' ];
156- $ ip = $ instanceRegistrar ['pod_ip ' ];
217+ $ ip = $ instanceRegistrar ['pod_ip ' ] ?: get_local_ip () ;
157218 $ port = $ instanceRegistrar ['pod_port ' ];
158219 $ option = $ instanceRegistrar ['options ' ] ?? [];
159- // 注销实例
220+ // 注销实例(groupName 未配置时使用 Nacos 默认的 DEFAULT_GROUP,避免 null 触发 TypeError)
160221 if (!$ this ->client ->instance ->delete (
161222 $ serviceName ,
162- $ option ['groupName ' ] ?? null ,
223+ ( string ) ( $ option ['groupName ' ] ?? ' DEFAULT_GROUP ' ) ,
163224 $ ip ,
164225 $ port ,
165226 [
166227 'namespaceId ' => $ option ['namespaceId ' ] ?? null ,
167228 'ephemeral ' => $ option ['ephemeral ' ] ?? null ,
229+ 'clusterName ' => $ option ['clusterName ' ] ?? null ,
168230 ]
169231 )) {
170232 $ this ->logger ()->error (
@@ -173,7 +235,7 @@ public function onWorkerStop(Worker $worker)
173235 );
174236 }
175237 }
176- } catch (GuzzleException $ exception ) {
238+ } catch (\ Throwable $ exception ) {
177239 $ this ->logger ()->error (
178240 "Nacos instance delete failed: [ {$ exception ->getCode ()}] {$ exception ->getMessage ()}. " ,
179241 ['name ' => '#base ' , 'trace ' => $ exception ->getTrace ()]
0 commit comments