Skip to content

Commit 613cf95

Browse files
authored
fix: rpc bind failure fatal (#14330)
Report a proper error when we're unable to initialize the RPC server Signed-off-by: Rudi Grinberg <me@rgrinberg.com>
1 parent fe129fe commit 613cf95

7 files changed

Lines changed: 80 additions & 9 deletions

File tree

src/dune_rpc_impl/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
source
1010
memo
1111
dune_util
12+
dune_trace
1213
fiber
1314
stdune
1415
unix)

src/dune_rpc_impl/server.ml

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ module Run = struct
3636
; root : string
3737
; where : Dune_rpc.Where.t
3838
; server : Csexp_rpc.Server.t Lazy.t
39-
; server_ivar : Csexp_rpc.Server.t Fiber.Ivar.t
39+
; startup_ivar : (Csexp_rpc.Server.t, Exn_with_backtrace.t) result Fiber.Ivar.t
4040
; registry : [ `Add | `Skip ]
4141
}
4242

@@ -61,8 +61,18 @@ module Run = struct
6161
in
6262
let run () =
6363
let open Fiber.O in
64-
let server = Lazy.force t.server in
65-
let* () = Fiber.Ivar.fill t.server_ivar server in
64+
let* server =
65+
match Exn_with_backtrace.try_with (fun () -> Lazy.force t.server) with
66+
| Ok server ->
67+
let+ () = Fiber.Ivar.fill t.startup_ivar (Ok server) in
68+
server
69+
| Error exn ->
70+
let () =
71+
Dune_trace.emit Rpc (fun () -> Dune_trace.Event.Rpc.startup_failure exn)
72+
in
73+
let* () = Fiber.Ivar.fill t.startup_ivar (Error exn) in
74+
Exn_with_backtrace.reraise exn
75+
in
6676
Fiber.fork_and_join_unit
6777
(fun () ->
6878
let* sessions = Csexp_rpc.Server.serve server in
@@ -196,15 +206,18 @@ type 'build_arg t =
196206
}
197207

198208
let ready (t : _ t) =
199-
let* server = Fiber.Ivar.read t.config.server_ivar in
200-
Csexp_rpc.Server.ready server
209+
Fiber.Ivar.read t.config.startup_ivar
210+
>>= function
211+
| Ok server -> Csexp_rpc.Server.ready server
212+
| Error _exn -> raise Dune_util.Report_error.Already_reported
201213
;;
202214

203215
let stop (t : _ t) =
204-
let* server = Fiber.Ivar.peek t.config.server_ivar in
205-
match server with
216+
Fiber.Ivar.peek t.config.startup_ivar
217+
>>= function
206218
| None -> Fiber.return ()
207-
| Some server -> Csexp_rpc.Server.stop server
219+
| Some (Error _) -> Fiber.return ()
220+
| Some (Ok server) -> Csexp_rpc.Server.stop server
208221
;;
209222

210223
let get_current_diagnostic_errors () =
@@ -494,7 +507,7 @@ let create ~lock_timeout ~registry ~root =
494507
; where
495508
; server
496509
; registry
497-
; server_ivar = Fiber.Ivar.create ()
510+
; startup_ivar = Fiber.Ivar.create ()
498511
}
499512
in
500513
let res = { config; pending_jobs; clients = Clients.empty } in

src/dune_trace/dune_trace.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ module Event : sig
188188
-> t
189189

190190
val shutdown : id:int -> stage -> t
191+
val startup_failure : Exn_with_backtrace.t -> t
191192
val close : id:int -> t
192193
val dropped_write_client_disconnect : Exn.t -> t
193194
end

src/dune_trace/event.ml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,12 @@ module Rpc = struct
507507

508508
let shutdown ~id stage = server ~id ~name:"shutdown" stage
509509

510+
let startup_failure exn =
511+
let now = Time.now () in
512+
let args = [ "error", Arg.dyn (Exn_with_backtrace.to_dyn exn) ] in
513+
Event.instant ~args ~name:"startup-failure" now Rpc
514+
;;
515+
510516
let close ~id =
511517
let now = Time.now () in
512518
let args = [ "id", Arg.int id ] in

test/blackbox-tests/setup-script.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ summarize_rpc_trace () {
186186
and .args.stage == "stop"
187187
and (.args | has("error"))
188188
then "accept stop error"
189+
elif .name == "startup-failure"
190+
then "startup failure"
189191
elif .name == "request" and .args.meth == "build"
190192
then "build \(.args.stage)"
191193
elif .name == "shutdown"

test/blackbox-tests/test-cases/watching/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
(= "macosx" %{ocaml-config:system}))
1919
(applies_to path-pwd))
2020

21+
(cram
22+
(applies_to rpc-bind-failure)
23+
(enabled_if
24+
(<> %{ocaml-config:system} win)))
25+
2126
;; Disabled due to timeouts
2227

2328
(cram
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
Startup RPC bind failures should be reported immediately and terminate dune.
2+
3+
$ export DUNE_TRACE=rpc
4+
5+
$ echo "(lang dune 3.23)" > dune-project
6+
7+
$ cat > dune <<EOF
8+
> (rule
9+
> (target x)
10+
> (action (write-file %{target} ok)))
11+
> EOF
12+
13+
Poison the parent of the Unix-domain RPC socket so that binding
14+
_build/.rpc/dune fails during startup.
15+
16+
$ mkdir -p _build
17+
$ : > _build/.rpc
18+
19+
$ OUTPUT=$(mktemp)
20+
$ ((dune build --passive-watch-mode >"$OUTPUT" 2>&1) || (echo exit $? >>"$OUTPUT")) &
21+
$ DUNE_PID=$!
22+
23+
$ wait_for_dune_exit_with_timeout
24+
25+
$ grep "Uncaught RPC Error" "$OUTPUT"
26+
Uncaught RPC Error
27+
28+
$ grep "bind" "$OUTPUT"
29+
Unix.Unix_error(Unix.ENOTDIR, "bind", "")
30+
Raised by primitive operation at Rpc__Csexp_rpc.Socket.U.bind in file
31+
Called from Rpc__Csexp_rpc.Socket.bind in file "src/rpc/csexp_rpc.ml"
32+
Error: bind(): Not a directory
33+
34+
$ grep '^exit 1$' "$OUTPUT"
35+
exit 1
36+
37+
$ dune trace cat | jq -c '
38+
> select(.cat == "rpc" and .name == "startup-failure")
39+
> | { name, error: .args.error.exn }
40+
> '
41+
{"name":"startup-failure","error":"Unix.Unix_error(Unix.ENOTDIR, \"bind\", \"\")"}
42+
43+
$ ! with_timeout_quiet dune rpc ping >/dev/null 2>&1

0 commit comments

Comments
 (0)