-
Notifications
You must be signed in to change notification settings - Fork 103
Snapshot using perf ctlfd #320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
open Core | ||
|
||
module Command = struct | ||
type t = string | ||
|
||
let snapshot = "snapshot" | ||
end | ||
|
||
let ack_msg = "ack\n\000" | ||
let ack_timeout = Time_ns.Span.of_int_sec 1 | ||
|
||
type t = | ||
{ mutable ctl_rx : Core_unix.File_descr.t | ||
; ctl_tx : Core_unix.File_descr.t | ||
; ack_rx : Core_unix.File_descr.t | ||
; mutable ack_tx : Core_unix.File_descr.t | ||
; ack_buf : Bytes.t | ||
} | ||
|
||
let create () = | ||
let ctl_rx, ctl_tx = Core_unix.pipe ~close_on_exec:false () in | ||
let ack_rx, ack_tx = Core_unix.pipe ~close_on_exec:false () in | ||
Core_unix.set_close_on_exec ctl_tx; | ||
Core_unix.set_close_on_exec ack_rx; | ||
{ ctl_rx; ctl_tx; ack_rx; ack_tx; ack_buf = Bytes.make (String.length ack_msg) '\000' } | ||
;; | ||
|
||
let close_perf_side_fds t = | ||
Core_unix.close ~restart:true t.ctl_rx; | ||
t.ctl_rx <- Core_unix.File_descr.of_int (-1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: let's make these |
||
Core_unix.close ~restart:true t.ack_tx; | ||
t.ack_tx <- Core_unix.File_descr.of_int (-1) | ||
;; | ||
|
||
let control_opt ({ ctl_rx; ack_tx; _ } as t) = | ||
let p = Core_unix.File_descr.to_int in | ||
( [ [%string "--control=fd:%{p ctl_rx#Int},%{p ack_tx#Int}"] ] | ||
, fun () -> close_perf_side_fds t ) | ||
;; | ||
|
||
let block_read_ack t = | ||
let total_bytes_read = ref 0 in | ||
while | ||
match | ||
Core_unix.select | ||
~restart:true | ||
~read:[ t.ack_rx ] | ||
~write:[] | ||
~except:[] | ||
~timeout:(`After ack_timeout) | ||
() | ||
with | ||
| { read = []; _ } -> failwith "Perf didn't ack snapshot within timeout" | ||
| { read = [ _fd ]; _ } -> | ||
let bytes_read = | ||
Core_unix.read ~restart:true t.ack_rx ~buf:t.ack_buf ~pos:!total_bytes_read | ||
in | ||
if bytes_read = 0 then failwith "Perf unexpectedly closed ack fd"; | ||
total_bytes_read := !total_bytes_read + bytes_read; | ||
!total_bytes_read < Bytes.length t.ack_buf | ||
| _ -> failwith "unreachable" | ||
do | ||
() | ||
done; | ||
if not | ||
(String.equal | ||
ack_msg | ||
(Bytes.unsafe_to_string ~no_mutation_while_string_reachable:t.ack_buf)) | ||
then failwith "Receive malformed ack from perf"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be very hard to debug if it ever raises. Let's turn this into a |
||
Bytes.fill t.ack_buf ~pos:0 ~len:(Bytes.length t.ack_buf) '\000' | ||
;; | ||
|
||
let dispatch_and_block_for_ack t (command : Command.t) = | ||
(* Don't do an async write because we want to write immediately; we don't really | ||
care if we block for a bit *) | ||
if Core_unix.single_write_substring ~restart:true t.ctl_tx ~buf:command | ||
<> String.length command | ||
then failwith "Unexpected partial write to perf ctlfd" | ||
else block_read_ack t | ||
;; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
open! Core | ||
|
||
type t | ||
|
||
val create : unit -> t | ||
|
||
(** Returns the additional arguments to `perf record` to use these as control fds, and a | ||
callback to invoke after the fork. *) | ||
val control_opt : t -> string list * (unit -> unit) | ||
|
||
module Command : sig | ||
type t | ||
|
||
val snapshot : t | ||
end | ||
|
||
val dispatch_and_block_for_ack : t -> Command.t -> unit |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,9 +89,43 @@ module Recording = struct | |
type t = { callgraph_mode : Callgraph_mode.t option } [@@deriving sexp] | ||
end | ||
|
||
module Snapshot_behavior = struct | ||
module At_exit = struct | ||
type t = | ||
| Sigint | ||
| Sigusr2 | ||
end | ||
|
||
module Function_call = struct | ||
type t = | ||
| Sigusr2 | ||
| Ctlfd of Perf_ctlfd.t | ||
end | ||
|
||
type t = | ||
| Never | ||
| At_exit of At_exit.t | ||
| Function_call of Function_call.t | ||
|
||
let opt t = | ||
let snapshot_opt = | ||
match t with | ||
| Never -> [] | ||
| At_exit Sigint -> [ "--snapshot=e" ] | ||
| Function_call (Ctlfd _ | Sigusr2) | At_exit Sigusr2 -> [ "--snapshot" ] | ||
in | ||
let control_opt, invoke_after_fork = | ||
match t with | ||
| Never | At_exit (Sigint | Sigusr2) | Function_call Sigusr2 -> [], Fn.id | ||
| Function_call (Ctlfd ctlfd) -> Perf_ctlfd.control_opt ctlfd | ||
in | ||
snapshot_opt @ control_opt, invoke_after_fork | ||
;; | ||
end | ||
|
||
type t = | ||
{ pid : Pid.t | ||
; when_to_snapshot : [ `at_exit of [ `sigint | `sigusr2 ] | `function_call | `never ] | ||
; snapshot_behavior : Snapshot_behavior.t | ||
} | ||
|
||
let perf_selector_of_trace_scope : Trace_scope.t -> string = function | ||
|
@@ -353,23 +387,23 @@ module Recording = struct | |
[] | ||
| None, Intel_processor_trace _ | None, Stacktrace_sampling _ -> [] | ||
in | ||
let when_to_snapshot = | ||
let snapshot_behavior : Snapshot_behavior.t = | ||
if full_execution | ||
then `never | ||
then Never | ||
else ( | ||
match when_to_snapshot with | ||
| Magic_trace_or_the_application_terminates -> | ||
if perf_supports_snapshot_on_exit then `at_exit `sigint else `at_exit `sigusr2 | ||
| Application_calls_a_function _ -> `function_call) | ||
if perf_supports_snapshot_on_exit then At_exit Sigint else At_exit Sigusr2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use the ctrlfd here too? This logic is also used for triggering on ^C; it'd be nice to get away from the buggy signal handling where we can. |
||
| Application_calls_a_function _ -> | ||
Function_call | ||
(if Perf_capabilities.(do_intersect capabilities ctlfd) | ||
then Ctlfd (Perf_ctlfd.create ()) | ||
else Sigusr2)) | ||
in | ||
let snapshot_opt = | ||
let snapshot_opt, invoke_after_fork = | ||
match collection_mode with | ||
| Stacktrace_sampling _ -> [] | ||
| Intel_processor_trace _ -> | ||
(match when_to_snapshot with | ||
| `never -> [] | ||
| `at_exit `sigint -> [ "--snapshot=e" ] | ||
| `function_call | `at_exit `sigusr2 -> [ "--snapshot" ]) | ||
| Stacktrace_sampling _ -> [], Fn.id | ||
| Intel_processor_trace _ -> Snapshot_behavior.opt snapshot_behavior | ||
in | ||
let overwrite_opts = | ||
match collection_mode, full_execution with | ||
|
@@ -403,6 +437,7 @@ module Recording = struct | |
session, it doesn't also send SIGINT to the perf process, allowing us to send it a | ||
SIGUSR2 first to get it to capture a snapshot before exiting. *) | ||
Core_unix.setpgid ~of_:perf_pid ~to_:perf_pid; | ||
invoke_after_fork (); | ||
let%map () = Async.Clock_ns.after (Time_ns.Span.of_ms 500.0) in | ||
(* Check that the process hasn't failed after waiting, because there's no point pausing | ||
to do recording if we've already failed. *) | ||
|
@@ -412,35 +447,35 @@ module Recording = struct | |
| Some (_, exit) -> perf_exit_to_or_error exit | ||
| _ -> Ok () | ||
in | ||
( { pid = perf_pid; when_to_snapshot } | ||
( { pid = perf_pid; snapshot_behavior } | ||
, { Data.callgraph_mode = selected_callgraph_mode } ) | ||
;; | ||
|
||
let maybe_take_snapshot t ~source = | ||
let signal = | ||
match t.when_to_snapshot, source with | ||
let should_take_snapshot = | ||
match t.snapshot_behavior, source with | ||
(* [`never] only comes up in [-full-execution] mode. In that mode, perf always gives a | ||
complete trace; there's no snapshotting. *) | ||
| `never, _ -> None | ||
| Never, _ -> false | ||
(* Do not snapshot at the end of a program if the user has set up a trigger symbol. *) | ||
| `function_call, `ctrl_c -> None | ||
| Function_call _, `ctrl_c -> false | ||
(* This shouldn't happen unless there was a bug elsewhere. It would imply that a trigger | ||
symbol was hit when there is no trigger symbol configured. *) | ||
| `at_exit _, `function_call -> None | ||
| At_exit _, `function_call -> false | ||
(* Trigger symbol was hit, and we're configured to look for them. *) | ||
| `function_call, `function_call -> Some Signal.usr2 | ||
| Function_call _, `function_call -> true | ||
(* Ctrl-C was hit, and we're configured to look for that. *) | ||
| `at_exit signal, `ctrl_c -> | ||
(* The actual signal to use varies depending on whether or not the user's version of perf | ||
supports snapshot-at-exit. *) | ||
Some | ||
(match signal with | ||
| `sigint -> Signal.int | ||
| `sigusr2 -> Signal.usr2) | ||
| At_exit _, `ctrl_c -> true | ||
in | ||
match signal with | ||
| None -> () | ||
| Some signal -> Signal_unix.send_i signal (`Pid t.pid) | ||
if should_take_snapshot | ||
then ( | ||
match t.snapshot_behavior with | ||
| Never -> failwith "unreachable" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: lift this block into a |
||
| At_exit Sigusr2 | Function_call Sigusr2 -> | ||
Signal_unix.send_i Signal.usr2 (`Pid t.pid) | ||
| At_exit Sigint -> Signal_unix.send_i Signal.int (`Pid t.pid) | ||
| Function_call (Ctlfd ctlfd) -> | ||
Perf_ctlfd.(dispatch_and_block_for_ack ctlfd Command.snapshot)) | ||
;; | ||
|
||
let finish_recording t = | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.