@@ -2,6 +2,9 @@ use bytes::Bytes;
22
33use log:: debug;
44use log:: error;
5+ use log:: warn;
6+
7+ use std:: time:: Duration ;
58
69use tokio:: sync:: oneshot:: channel;
710
@@ -41,7 +44,14 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
4144
4245 // Expect x-request-id header
4346 let request_id = match req. headers ( ) . get ( "x-request-id" ) {
44- Some ( value) => value. to_str ( ) . unwrap ( ) ,
47+ Some ( value) => match value. to_str ( ) {
48+ Ok ( s) => s,
49+ Err ( _) => {
50+ return HttpResponse :: BadRequest ( )
51+ . content_type ( "text/plain" )
52+ . body ( "Invalid x-request-id header encoding" ) ;
53+ }
54+ } ,
4555 None => {
4656 return HttpResponse :: BadRequest ( )
4757 . content_type ( "text/plain" )
@@ -50,17 +60,35 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
5060 } ;
5161
5262 let host = match req. headers ( ) . get ( "host" ) {
53- Some ( value) => Some ( value. to_str ( ) . unwrap ( ) . to_string ( ) ) ,
63+ Some ( value) => match value. to_str ( ) {
64+ Ok ( s) => Some ( s. to_string ( ) ) ,
65+ Err ( _) => {
66+ error ! ( "Invalid host header encoding" ) ;
67+ None
68+ }
69+ } ,
5470 None => None ,
5571 } ;
5672
5773 let mut worker_id = match req. headers ( ) . get ( "x-worker-id" ) {
58- Some ( value) => Some ( value. to_str ( ) . unwrap ( ) . to_string ( ) ) ,
74+ Some ( value) => match value. to_str ( ) {
75+ Ok ( s) => Some ( s. to_string ( ) ) ,
76+ Err ( _) => {
77+ error ! ( "Invalid x-worker-id header encoding" ) ;
78+ None
79+ }
80+ } ,
5981 None => None ,
6082 } ;
6183
6284 let mut worker_name = match req. headers ( ) . get ( "x-worker-name" ) {
63- Some ( value) => Some ( value. to_str ( ) . unwrap ( ) . to_string ( ) ) ,
85+ Some ( value) => match value. to_str ( ) {
86+ Ok ( s) => Some ( s. to_string ( ) ) ,
87+ Err ( _) => {
88+ error ! ( "Invalid x-worker-name header encoding" ) ;
89+ None
90+ }
91+ } ,
6492 None => None ,
6593 } ;
6694
@@ -117,7 +145,7 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
117145
118146 // Create a new request to forward to the worker.
119147 let request = {
120- let mut request: http_v02:: Request < Bytes > = http_v02:: Request :: builder ( )
148+ let mut request: http_v02:: Request < Bytes > = match http_v02:: Request :: builder ( )
121149 . uri ( format ! (
122150 "{}://{}{}" ,
123151 req. connection_info( ) . scheme( ) ,
@@ -126,7 +154,15 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
126154 ) )
127155 . method ( req. method ( ) )
128156 . body ( body)
129- . unwrap ( ) ;
157+ {
158+ Ok ( r) => r,
159+ Err ( e) => {
160+ error ! ( "Failed to build request: {}" , e) ;
161+ return HttpResponse :: InternalServerError ( )
162+ . content_type ( "text/plain" )
163+ . body ( "Failed to build forwarded request" ) ;
164+ }
165+ } ;
130166
131167 // Copy headers from the incoming request to the forwarded request.
132168 let headers = request. headers_mut ( ) ;
@@ -136,18 +172,26 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
136172
137173 // If the worker id is not provided, we add it to the headers.
138174 if req. headers ( ) . get ( "x-worker-id" ) . is_none ( ) {
139- headers. insert (
140- "x-worker-id" ,
141- http_v02:: HeaderValue :: from_str ( & worker. id ) . unwrap ( ) ,
142- ) ;
175+ match http_v02:: HeaderValue :: from_str ( & worker. id ) {
176+ Ok ( header_value) => {
177+ headers. insert ( "x-worker-id" , header_value) ;
178+ }
179+ Err ( e) => {
180+ error ! ( "Invalid worker id for header: {}" , e) ;
181+ }
182+ }
143183 }
144184
145185 // If the worker name is not provided, we add it to the headers.
146186 if req. headers ( ) . get ( "x-worker-name" ) . is_none ( ) {
147- headers. insert (
148- "x-worker-name" ,
149- http_v02:: HeaderValue :: from_str ( & worker. name ) . unwrap ( ) ,
150- ) ;
187+ match http_v02:: HeaderValue :: from_str ( & worker. name ) {
188+ Ok ( header_value) => {
189+ headers. insert ( "x-worker-name" , header_value) ;
190+ }
191+ Err ( e) => {
192+ error ! ( "Invalid worker name for header: {}" , e) ;
193+ }
194+ }
151195 }
152196
153197 request
@@ -179,8 +223,9 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
179223 } ;
180224
181225 let ( res_tx, res_rx) = channel :: < http_v02:: Response < Bytes > > ( ) ;
226+ let ( termination_tx, termination_rx) = channel :: < openworkers_runner:: TerminationReason > ( ) ;
182227
183- openworkers_runner:: event_fetch:: run_fetch ( worker, request, res_tx, data. log_tx . clone ( ) , permit) ;
228+ openworkers_runner:: event_fetch:: run_fetch ( worker, request, res_tx, termination_tx , data. log_tx . clone ( ) , permit) ;
184229
185230 let response = match res_rx. await {
186231 Ok ( res) => {
@@ -192,9 +237,44 @@ async fn handle_request(data: Data<AppState>, req: HttpRequest, body: Bytes) ->
192237
193238 rb. body ( res. body ( ) . clone ( ) )
194239 }
195- Err ( err) => {
196- error ! ( "worker fetch error: {}, ensure the worker registered a listener for the 'fetch' event" , err) ;
197- HttpResponse :: InternalServerError ( ) . body ( err. to_string ( ) )
240+ Err ( _) => {
241+ // Worker didn't send a response, check termination reason
242+ use openworkers_runner:: TerminationReason ;
243+
244+ let reason = termination_rx. await . unwrap_or ( TerminationReason :: Exception ) ;
245+
246+ error ! ( "worker terminated without sending response: {:?}" , reason) ;
247+
248+ let status = reason. http_status ( ) ;
249+ let body = match reason {
250+ TerminationReason :: Success => {
251+ // This shouldn't happen - worker completed but didn't send response
252+ "Worker completed but did not send a response (missing fetch event listener?)"
253+ }
254+ TerminationReason :: CpuTimeLimit => {
255+ "Worker exceeded CPU time limit (100ms)"
256+ }
257+ TerminationReason :: WallClockTimeout => {
258+ "Worker exceeded wall-clock time limit (60s)"
259+ }
260+ TerminationReason :: MemoryLimit => {
261+ "Worker exceeded memory limit (128MB)"
262+ }
263+ TerminationReason :: Exception => {
264+ "Worker threw an uncaught exception"
265+ }
266+ TerminationReason :: InitializationError => {
267+ "Worker failed to initialize"
268+ }
269+ TerminationReason :: Terminated => {
270+ "Worker was terminated"
271+ }
272+ } ;
273+
274+ HttpResponse :: build ( actix_web:: http:: StatusCode :: from_u16 ( status) . unwrap ( ) )
275+ . content_type ( "text/plain" )
276+ . insert_header ( ( "X-Termination-Reason" , format ! ( "{:?}" , reason) ) )
277+ . body ( body)
198278 }
199279 } ;
200280
@@ -231,26 +311,65 @@ async fn main() -> std::io::Result<()> {
231311 }
232312
233313 let db_url = std:: env:: var ( "DATABASE_URL" ) . expect ( "DATABASE_URL must be set" ) ;
234- let pool = PgPoolOptions :: new ( )
235- . max_connections ( 4 )
236- . connect ( & db_url)
237- . await
238- . expect ( "Failed to connect to Postgres" ) ;
239-
240- // Check postgres connection
241- sqlx:: query ( "SELECT 1" )
242- . fetch_one ( & pool)
243- . await
244- . expect ( "Failed to query Postgres" ) ;
245- debug ! ( "connected to Postgres" ) ;
246-
247- // Check NATS connection
248- let nats_client = openworkers_runner:: nats:: nats_connect ( ) . await ;
249- nats_client
250- . publish ( "boot" , "0" . into ( ) )
251- . await
252- . expect ( "Failed to connect to NATS" ) ;
253- debug ! ( "connected to NATS" ) ;
314+
315+ // Retry database connection with exponential backoff
316+ let mut retry_count = 0 ;
317+ let max_retries = 5 ;
318+ let pool = loop {
319+ match PgPoolOptions :: new ( )
320+ . max_connections ( 20 ) // Increased from 4
321+ . acquire_timeout ( Duration :: from_secs ( 5 ) )
322+ . connect ( & db_url)
323+ . await
324+ {
325+ Ok ( pool) => {
326+ // Test the connection
327+ match sqlx:: query ( "SELECT 1" ) . fetch_one ( & pool) . await {
328+ Ok ( _) => {
329+ debug ! ( "connected to Postgres" ) ;
330+ break pool;
331+ }
332+ Err ( e) => {
333+ error ! ( "Database connection test failed: {}" , e) ;
334+ if retry_count >= max_retries {
335+ panic ! ( "Failed to connect to database after {} retries" , max_retries) ;
336+ }
337+ }
338+ }
339+ }
340+ Err ( e) => {
341+ retry_count += 1 ;
342+ if retry_count > max_retries {
343+ panic ! ( "Failed to connect to database after {} retries: {}" , max_retries, e) ;
344+ }
345+ let wait_time = Duration :: from_secs ( 2u64 . pow ( retry_count. min ( 5 ) ) ) ;
346+ warn ! ( "Database connection attempt {} failed: {}. Retrying in {:?}..." ,
347+ retry_count, e, wait_time) ;
348+ tokio:: time:: sleep ( wait_time) . await ;
349+ }
350+ }
351+ } ;
352+
353+ // Connect to NATS with retries
354+ let mut retry_count = 0 ;
355+ loop {
356+ match openworkers_runner:: nats:: nats_connect ( ) . await . publish ( "boot" , "0" . into ( ) ) . await {
357+ Ok ( _) => {
358+ debug ! ( "connected to NATS" ) ;
359+ break ;
360+ }
361+ Err ( e) => {
362+ retry_count += 1 ;
363+ if retry_count > max_retries {
364+ panic ! ( "Failed to connect to NATS after {} retries: {}" , max_retries, e) ;
365+ }
366+ let wait_time = Duration :: from_secs ( 2u64 . pow ( retry_count. min ( 5 ) ) ) ;
367+ warn ! ( "NATS connection attempt {} failed: {}. Retrying in {:?}..." ,
368+ retry_count, e, wait_time) ;
369+ tokio:: time:: sleep ( wait_time) . await ;
370+ }
371+ }
372+ }
254373
255374 // Start global log publisher
256375 let log_tx = openworkers_runner:: log:: start_log_publisher ( ) ;
0 commit comments