Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ jobs:
os:
- ubuntu-latest
- macos-15
- windows-latest
exclude:
- os: windows-latest
ocaml-compiler: 5.0
- os: windows-latest
ocaml-compiler: 5.1
- os: windows-latest
ocaml-compiler: 5.2
- os: windows-latest
ocaml-compiler: 5.4

runs-on: ${{ matrix.os }}

Expand All @@ -37,7 +47,6 @@ jobs:
opam-depext: false
ocaml-compiler: ${{ matrix.ocaml-compiler }}


- run: opam install . --deps-only --with-test

- run: opam exec -- dune build
Expand All @@ -47,3 +56,4 @@ jobs:
- run: |
opam install . --with-dev-setup --with-test --deps-only
opam exec -- dune build @fmt
if: runner.os != 'Windows'
2 changes: 1 addition & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
(maintenance_intent "(latest)")
(depends
(ocaml (>= "5.0.0~"))
hdr_histogram
(hdr_histogram (>= "0.0.5"))
(cmdliner (>= 2.0.0))
(trace-fuchsia (>= 0.11))
(trace (>= 0.11))
Expand Down
90 changes: 75 additions & 15 deletions lib/olly_common/launch.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
external is_process_alive : int -> bool = "olly_is_process_alive"
external get_process_id : int -> int = "olly_get_process_id"

let lost_events ring_id num =
Printf.eprintf "[ring_id=%d] Lost %d events\n%!" ring_id num
Expand All @@ -16,8 +17,12 @@ type exec_config = Attach of string * int | Execute of string list
(* Raised by exec_process to indicate various unrecoverable failures. *)
exception Fail of string

(* Function to kill a spawned process. Only the spawner can terminate
the child. *)
type kill = unit -> unit

let exec_process (config : runtime_events_config) (args : string list) :
subprocess =
subprocess * kill =
if not (List.length args > 0) then
raise (Fail (Printf.sprintf "no executable provided for exec_process"));

Expand Down Expand Up @@ -70,25 +75,73 @@ let exec_process (config : runtime_events_config) (args : string list) :
base_env;
]
in
let child_pid =
let child_handle =
try
Unix.create_process_env executable_filename (Array.of_list args) env
Unix.stdin Unix.stdout Unix.stderr
with Unix.Unix_error (Unix.ENOENT, _, _) ->
raise
(Fail (Printf.sprintf "executable %s not found" executable_filename))
in
Unix.sleepf 0.1;
let child_pid = get_process_id child_handle in
(* OCaml's Unix module on Windows uses the value returned by
create_process (a HANDLE) as its process identifier, both Unix.kill
and Unix.waitpid expect that value, NOT the real OS PID from
GetProcessId. On Unix the handle and PID coincide.

We poll waitpid with WNOHANG rather than blocking indefinitely: if
the kill didn't take for any reason, hanging the test suite for
hours is worse than reporting and moving on. *)
let kill () =
(try Unix.kill child_handle Sys.sigkill
with Unix.Unix_error _ | Invalid_argument _ -> ());
let deadline = Unix.gettimeofday () +. 5.0 in
let rec wait () =
if Unix.gettimeofday () >= deadline then ()
else
match Unix.waitpid [ Unix.WNOHANG ] child_handle with
| 0, _ ->
Unix.sleepf 0.02;
wait ()
| _, _ -> ()
| exception Unix.Unix_error _ -> ()
in
wait ()
in
(* Poll until we can create a cursor for the child's ring buffer.
The OCaml runtime creates the .events file then initializes it,
so we retry create_cursor rather than just checking file existence.
Under load (e.g. parallel dune builds) process creation can be slow. *)
let timeout = 5.0 in
let poll_interval = 0.05 in
let deadline = Unix.gettimeofday () +. timeout in
let cursor =
try Runtime_events.create_cursor (Some (dir, child_pid))
with Failure str ->
(* Provide some context for which directory was passed to create_cursor *)
failwith (str ^ " Directory: " ^ dir)
let last_exn = ref None in
let result = ref None in
while !result = None && Unix.gettimeofday () < deadline do
try result := Some (Runtime_events.create_cursor (Some (dir, child_pid)))
with Failure _ as exn ->
last_exn := Some exn;
Unix.sleepf poll_interval
done;
match !result with
| Some c -> c
| None ->
(* Clean up the child process before failing — otherwise it becomes
an orphan. On Windows, orphan processes hold .events files open
and prevent dune from cleaning up its temp directory. *)
kill ();
let msg =
match !last_exn with
| Some (Failure str) -> str ^ " Directory: " ^ dir
| _ -> "Timed out waiting for runtime events. Directory: " ^ dir
in
failwith msg
in
let alive () =
match Unix.waitpid [ Unix.WNOHANG ] child_pid with
match Unix.waitpid [ Unix.WNOHANG ] child_handle with
| 0, _ -> true
| p, _ when p = child_pid -> false
| p, _ when p = child_handle -> false
| _, _ -> assert false
| exception Unix.Unix_error (Unix.EINTR, _, _) -> true
and close () =
Expand All @@ -101,10 +154,10 @@ let exec_process (config : runtime_events_config) (args : string list) :
let ring_file =
Filename.concat dir (string_of_int child_pid ^ ".events")
in
Unix.unlink ring_file
try Unix.unlink ring_file with Unix.Unix_error _ -> ()
end
in
{ alive; cursor; close; pid = child_pid }
({ alive; cursor; close; pid = child_pid }, kill)

let attach_process (dir : string) (pid : int) : subprocess =
(* Check the target process exists before attempting to attach *)
Expand Down Expand Up @@ -135,7 +188,7 @@ let attach_process (dir : string) (pid : int) : subprocess =

let launch_process config (exec_args : exec_config) : subprocess =
match exec_args with
| Execute argsl -> exec_process config argsl
| Execute argsl -> fst (exec_process config argsl)
| Attach (dir, pid) -> attach_process dir pid

let interrupted = Atomic.make false
Expand All @@ -151,13 +204,20 @@ let collect_events poll_sleep ~on_poll child callbacks =
(* Read from the child process *)
while child.alive () && not (Atomic.get interrupted) do
on_poll child.pid;
Runtime_events.read_poll child.cursor callbacks None |> ignore;
(try Runtime_events.read_poll child.cursor callbacks None |> ignore
with Failure _ ->
(* The child may have exited between the alive check and read_poll,
leaving the ring buffer in a partially-written state. *)
());
if poll_sleep > 0.0 then
try Unix.sleepf poll_sleep
with Unix.Unix_error (Unix.EINTR, _, _) -> ()
done;
(* Do one more poll in case there are any remaining events we've missed *)
Runtime_events.read_poll child.cursor callbacks None |> ignore)
(* Do one more poll in case there are any remaining events we've missed.
After the child exits, the ring buffer may be in an inconsistent state
so we tolerate read failures here. *)
try Runtime_events.read_poll child.cursor callbacks None |> ignore
with Failure _ -> ())

type 'r acceptor_fn = int -> Runtime_events.Timestamp.t -> 'r

Expand Down
13 changes: 13 additions & 0 deletions lib/olly_common/process_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,16 @@ CAMLprim value olly_is_process_alive(value v_pid) {
CAMLreturn(Val_false);
#endif
}

#ifdef _WIN32
CAMLprim value olly_get_process_id(value v_handle) {
HANDLE h = (HANDLE)(intnat)Long_val(v_handle);
DWORD pid = GetProcessId(h);
return Val_long((intnat)pid);
}
#else
CAMLprim value olly_get_process_id(value v_pid) {
/* On Unix, create_process_env returns the actual PID */
return v_pid;
}
#endif
3 changes: 1 addition & 2 deletions runtime_events_tools.opam
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ bug-reports: "https://github.com/tarides/runtime_events_tools/issues"
depends: [
"dune" {>= "3.18"}
"ocaml" {>= "5.0.0~"}
"hdr_histogram"
"hdr_histogram" {>= "0.0.5"}
"cmdliner" {>= "2.0.0"}
"trace-fuchsia" {>= "0.11"}
"trace" {>= "0.11"}
Expand All @@ -35,4 +35,3 @@ build: [
]
dev-repo: "git+https://github.com/tarides/runtime_events_tools.git"
x-maintenance-intent: ["(latest)"]
available: os != "win32"
1 change: 0 additions & 1 deletion runtime_events_tools.opam.template
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
available: os != "win32"
1 change: 0 additions & 1 deletion runtime_events_tools_bare.opam
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,3 @@ build: [
]
dev-repo: "git+https://github.com/tarides/runtime_events_tools.git"
x-maintenance-intent: ["(latest)"]
available: os != "win32"
1 change: 0 additions & 1 deletion runtime_events_tools_bare.opam.template
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
available: os != "win32"
31 changes: 17 additions & 14 deletions test/test_launch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ let process_launch_failure () =
(fun () -> ignore (Launch.exec_process config [ "missing.exe" ]));

match_raises "non-executable should not launch"
(* File for exec_process is not an executable *)
(function Unix.Unix_error (Unix.EACCES, _, _) -> true | _exn -> false)
(* File for exec_process is not an executable.
Unix returns EACCES, Windows returns ENOEXEC. *)
(function
| Unix.Unix_error (Unix.EACCES, _, _) -> true
| Unix.Unix_error (Unix.ENOEXEC, _, _) -> true
| _exn -> false)
(fun () -> ignore (Launch.exec_process config [ "./run_endlessly.ml" ]));

match_raises "empty executable string should not launch"
Expand All @@ -23,18 +27,17 @@ let process_launch () =
Alcotest.(check bool)
"process should launch" true
(try
let a = Launch.exec_process config [ "./run_endlessly.exe" ] in
try
(* Sending signal Zero to kill checks the process exists for Unix. *)
Unix.kill a.pid 0;
true
with Unix.Unix_error (Unix.ESRCH, _, _) -> false
with
(* Any exceptions indicate a failure to launch *)
| Unix.Unix_error (Unix.ENOENT, _, _) -> false
| _exn ->
Printf.printf "%s" (Printexc.to_string _exn);
false)
let a, kill = Launch.exec_process config [ "./run_endlessly.exe" ] in
let launched = a.alive () in
(* Clean up the child process so it doesn't leak.
On Windows, open files cannot be deleted, so the orphan process
would prevent dune from cleaning up its temp directory. *)
kill ();
a.close ();
launched
with _exn ->
Printf.eprintf "%s" (Printexc.to_string _exn);
false)

let () =
let open Alcotest in
Expand Down
Loading