Skip to content

Update @snoop_inference for Julia 1.12 #419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name = "SnoopCompile"
uuid = "aa65fe97-06da-5843-b5b1-d5d13cad87d2"
version = "3.2.0"
author = ["Tim Holy <[email protected]>", "Shuhei Kadowaki <[email protected]>"]
version = "3.1.2"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
FlameGraphs = "08572546-2f56-4bcf-ba4e-bab62c3a3f89"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Expand All @@ -28,6 +29,7 @@ SCPyPlotExt = "PyPlot"

[compat]
AbstractTrees = "0.4"
CodeTracking = "1.3.9"
Cthulhu = "2"
FlameGraphs = "1"
InteractiveUtils = "1"
Expand All @@ -45,7 +47,7 @@ Serialization = "1"
SnoopCompileCore = "3"
Test = "1"
YAML = "0.4"
julia = "1.10"
julia = "1.12"

[extras]
Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f"
Expand Down
4 changes: 2 additions & 2 deletions SnoopCompileCore/Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name = "SnoopCompileCore"
uuid = "e2b509da-e806-4183-be48-004708413034"
author = ["Tim Holy <[email protected]>"]
version = "3.0.0"
version = "3.1.0"

[deps]
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[compat]
julia = "1"
julia = "1.12"
2 changes: 1 addition & 1 deletion SnoopCompileCore/src/SnoopCompileCore.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module SnoopCompileCore

using Core: MethodInstance, CodeInfo
using Core: MethodInstance, CodeInstance, CodeInfo

include("snoop_inference.jl")
include("snoop_invalidations.jl")
Expand Down
232 changes: 133 additions & 99 deletions SnoopCompileCore/src/snoop_inference.jl
Original file line number Diff line number Diff line change
@@ -1,100 +1,26 @@
export @snoop_inference

struct InferenceTiming
mi_info::Core.Compiler.Timings.InferenceFrameInfo
inclusive_time::Float64
exclusive_time::Float64
end
"""
inclusive(frame)

Return the time spent inferring `frame` and its callees.
"""
inclusive(it::InferenceTiming) = it.inclusive_time
"""
exclusive(frame)

Return the time spent inferring `frame`, not including the time needed for any of its callees.
"""
exclusive(it::InferenceTiming) = it.exclusive_time

struct InferenceTimingNode
mi_timing::InferenceTiming
start_time::Float64
children::Vector{InferenceTimingNode}
bt
parent::InferenceTimingNode

# Root constructor
InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt)) =
new(mi_timing, start_time, InferenceTimingNode[], bt)
# Child constructor
function InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt), parent::InferenceTimingNode)
child = new(mi_timing, start_time, InferenceTimingNode[], bt, parent)
push!(parent.children, child)
return child
end
end
inclusive(node::InferenceTimingNode) = inclusive(node.mi_timing)
exclusive(node::InferenceTimingNode) = exclusive(node.mi_timing)
InferenceTiming(node::InferenceTimingNode) = node.mi_timing

function InferenceTimingNode(t::Core.Compiler.Timings.Timing)
ttree = timingtree(t)
it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}}
root = InferenceTimingNode(it, start_time, t.bt)
addchildren!(root, t, ttree_children)
return root
const snoop_inference_lock = ReentrantLock()
const newly_inferred = CodeInstance[]

function start_tracking()
islocked(snoop_inference_lock) && error("already tracking inference (cannot nest `@snoop_inference` blocks)")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(this really should be a Base feature, since implementing it indirectly in packages is not very robust)

Suggested change
islocked(snoop_inference_lock) && error("already tracking inference (cannot nest `@snoop_inference` blocks)")
snoop_inference_lock.reentrancy_cnt == 1 || throw(ConcurrencyViolationError("already tracking inference (cannot nest `@snoop_inference` blocks)"))

lock(snoop_inference_lock)
empty!(newly_inferred)
ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred)
# return ccall(:jl_log_dispatch_backtrace, Any, (Cint,), 1)
empty!(Core.Compiler.dispatch_backtrace)
Core.Compiler.collect_dispatch_backtrace[] = true
return nothing
end

# Compute inclusive times and store as a temporary tree.
# To allow InferenceTimingNode to be both bidirectional and immutable, we need to create parent node before the child nodes.
# However, each node stores its inclusive time, which can only be computed efficiently from the leaves up (children before parents).
# This performs the inclusive-time computation, storing the result as a "temporary tree" that can be used during
# InferenceTimingNode creation (see `addchildren!`).
function timingtree(t::Core.Compiler.Timings.Timing)
time, start_time = t.time/10^9, t.start_time/10^9
incl_time = time
tchildren = []
for child in t.children
tchild = timingtree(child)
push!(tchildren, tchild)
incl_time += inclusive(tchild[1])
end
return (InferenceTiming(t.mi_info, incl_time, time), start_time, tchildren)
end

function addchildren!(parent::InferenceTimingNode, t::Core.Compiler.Timings.Timing, ttrees)
for (child, ttree) in zip(t.children, ttrees)
it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}}
node = InferenceTimingNode(it, start_time, child.bt, parent)
addchildren!(node, child, ttree_children)
end
end

function start_deep_timing()
Core.Compiler.Timings.reset_timings()
Core.Compiler.__set_measure_typeinf(true)
end
function stop_deep_timing()
Core.Compiler.__set_measure_typeinf(false)
Core.Compiler.Timings.close_current_timer()
end

function finish_snoop_inference()
return InferenceTimingNode(Core.Compiler.Timings._timings[1])
end

function _snoop_inference(cmd::Expr)
return quote
start_deep_timing()
try
$(esc(cmd))
finally
stop_deep_timing()
end
finish_snoop_inference()
end
function stop_tracking()
@assert islocked(snoop_inference_lock)
Copy link
Collaborator

@vtjnash vtjnash Apr 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: more concisely and correctly

Suggested change
@assert islocked(snoop_inference_lock)
Base.assert_havelock(snoop_inference_lock)

ccall(:jl_set_newly_inferred, Cvoid, (Any,), nothing)
unlock(snoop_inference_lock)
# ccall(:jl_log_dispatch_backtrace, Any, (Cint,), 0)
Core.Compiler.collect_dispatch_backtrace[] = false
return nothing
end

"""
Expand Down Expand Up @@ -134,11 +60,119 @@ julia> tinf = @snoop_inference begin
```
"""
macro snoop_inference(cmd)
return _snoop_inference(cmd)
return esc(quote
local backtrace_log = $(SnoopCompileCore.start_tracking)()
try
$cmd
finally
$(SnoopCompileCore.stop_tracking)()
end
$timingtree($(SnoopCompileCore.newly_inferred), copy(Core.Compiler.dispatch_backtrace))
end)
end

# These are okay to come at the top-level because we're only measuring inference, and
# inference results will be cached in a `.ji` file.
precompile(start_deep_timing, ())
precompile(stop_deep_timing, ())
precompile(finish_snoop_inference, ())
struct InferenceTimingNode
ci::CodeInstance
children::Vector{InferenceTimingNode}
bt
parent::InferenceTimingNode

function InferenceTimingNode(ci::CodeInstance, st) # for creating the root
return new(ci, InferenceTimingNode[], st)
end
function InferenceTimingNode(ci::CodeInstance, st, parent)
child = new(ci, InferenceTimingNode[], st, parent)
push!(parent.children, child)
return child
end
end

function timingtree(cis, backtraces)
root = InferenceTimingNode(Core.Compiler.Timings.ROOTmi.cache, nothing)
# the cis are added in the order children-before-parents, we need to be able to reverse that
# We index on MethodInstance rather than CodeInstance, because constprop can result in a distinct
# (and uncached) CodeInstance for the same MethodInstance
miidx = Dict([methodinstance(ci) for ci in cis] .=> eachindex(cis))
backedges = [Int[] for _ in eachindex(cis)]
for (i, ci) in pairs(cis)
for e in ci.edges
e isa CodeInstance || continue
eidx = get(miidx, methodinstance(e), nothing)
if eidx !== nothing
push!(backedges[eidx], i)
end
end
end
# backtraces = Dict{MethodInstance,Any}(backtrace_log[i] => backtrace_log[i+1] for i in 1:2:length(backtrace_log))
addchildren!(root, cis, backedges, miidx, backtraces)
return root
end

function addchildren!(parent::InferenceTimingNode, handled::Set{CodeInstance}, miidx)
for ci in parent.ci.edges
ci isa CodeInstance || continue
haskey(miidx, methodinstance(ci)) || continue
ci ∈ handled && continue
child = InferenceTimingNode(ci, nothing, parent)
push!(handled, ci)
addchildren!(child, handled, miidx)
end
return parent
end

function addchildren!(parent::InferenceTimingNode, cis, backedges, miidx, backtraces)
handled = Set{CodeInstance}()
for (i, ci) in pairs(cis)
ci ∈ handled && continue
# Follow the backedges to the root
j = i
be = ci
while true
found = false
for k in backedges[j]
be = cis[k]
if be ∉ handled
j = k
found = true
break
end
end
found || break
end
be ∈ handled && continue
# bt1, bt2 = get(backtraces, Core.Compiler.get_ci_mi(be), (nothing, nothing))
# child = InferenceTimingNode(be, make_stacktrace(bt1, bt2), parent)
child = InferenceTimingNode(be, get(backtraces, be, nothing), parent)
push!(handled, be)
addchildren!(child, handled, miidx)
end
return parent
end

methodinstance(ci::CodeInstance) = Core.Compiler.get_ci_mi(ci)

# make_stacktrace(bt1::Vector{Ptr{Cvoid}}, bt2::Vector{Any}) = Base._reformat_bt(bt1, bt2)
# make_stacktrace(::Nothing, ::Nothing) = nothing

## API functions

"""
inclusive(ci::InferenceTimingNode)

Return the time spent inferring `ci` and its callees.
"""
inclusive(ci::CodeInstance) = reinterpret(Float16, ci.time_infer_total)
inclusive(node::InferenceTimingNode) = inclusive(node.ci)

"""
exclusive(ci::InferenceTimingNode)

Return the time spent inferring `ci`, not including the time needed for any of its callees.
"""
exclusive(ci::CodeInstance) = reinterpret(Float16, ci.time_infer_self)
exclusive(node::InferenceTimingNode) = exclusive(node.ci)


precompile(start_tracking, ())
precompile(stop_tracking, ())
precompile(timingtree, (Vector{CodeInstance},))
8 changes: 5 additions & 3 deletions src/SnoopCompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ module SnoopCompile
using SnoopCompileCore
# More exports are defined below in the conditional loading sections

using Core: MethodInstance, CodeInfo
using Core: MethodInstance, CodeInstance, CodeInfo
using InteractiveUtils
using Serialization
using Printf
using OrderedCollections
using CodeTracking
import YAML # For @snoop_llvm

using Base: specializations
Expand Down Expand Up @@ -72,6 +73,7 @@ end

include("parcel_snoop_inference.jl")
include("inference_demos.jl")
export InferenceTiming, InferenceTimingNode
export exclusive, inclusive, flamegraph, flatten, accumulate_by_source, collect_for, runtime_inferencetime, staleinstances
export InferenceTrigger, inference_triggers, callerinstance, callingframe, skiphigherorder, trigger_tree, suggest, isignorable
export report_callee, report_caller, report_callees
Expand All @@ -82,8 +84,8 @@ export read_snoop_llvm
include("invalidations.jl")
export uinvalidated, invalidation_trees, filtermod, findcaller

include("invalidation_and_inference.jl")
export precompile_blockers
# include("invalidation_and_inference.jl")
# export precompile_blockers

# Write
include("write.jl")
Expand Down
Loading
Loading