Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions examples/softkiller/README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
# JetReconstruction.jl Examples - SoftKiller

Here are simple examples of SoftKiller that read from two HepMC3 files and
then run the SoftKiller algorithm.

## `softkiller_plots.jl`

This example can be run as:

```sh
julia --project softkiller_plots.jl --maxevents=100 --grid-size=0.4 --algorithm=Kt --pileup-file=../../test/data/sk_example_PU.hepmc.zst --hard-file=../../test/data/sk_example_HS.hepmc.zst
julia --project softkiller_plots.jl --pileup-maxevents=100 --eventno=4 --grid-size=0.4 --algorithm=Kt ../../test/data/sk_example_HS.hepmc.zst ../../test/data/sk_example_PU.hepmc.zst
```

This is a simple example of SoftKiller that reads from two HepMC3 files
and displays plots of clustering without SoftKiller and with SoftKiller.
and displays plots of clustering with SoftKiller and without SoftKiller.

## `softkiller_runtime.jl`

This example can be run as:

```sh
julia --project softkiller_runtime.jl --maxevents=100 --grid-size=0.4 --algorithm=Kt --pileup-file=../../test/data/sk_example_PU.hepmc.zst --hard-file=../../test/data/sk_example_HS.hepmc.zst
julia --project softkiller_runtime.jl --pileup-maxevents=100 --eventno=4 --grid-size=0.4 --algorithm=Kt ../../test/data/sk_example_HS.hepmc.zst ../../test/data/sk_example_PU.hepmc.zst
```

This is a simple example of SoftKiller that reads from two HepMC3 files
and displays the runtime of the SoftKiller algorithm.
For both scripts the number of pileup events to add to the hard scatter event
can be controlled with `--pileup-maxevents` and `--pileup-skip`; and the hard
scatter event to work with can be changed with `--eventno`.
46 changes: 17 additions & 29 deletions examples/softkiller/softkiller_plots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,20 @@ include(joinpath(@__DIR__, "..", "parse-options.jl"))
function parse_command_line(args)
s = ArgParseSettings(autofix_names = true)
@add_arg_table! s begin
"--maxevents", "-n"
help = "Maximum number of events to read. -1 to read all events from the file."
"--pileup-maxevents", "-n"
help = "Maximum number of events to read. -1 to read all events from the pileup events file."
arg_type = Int
default = -1

"--skip", "-s"
help = "Number of events to skip at beginning of the file."
"--pileup-skip", "-s"
help = "Number of events to skip in the pileup events file."
arg_type = Int
default = 0

"--ptmin"
help = "Minimum p_t for final inclusive jets (energy unit is the same as the input clusters, usually GeV)"
arg_type = Float64
default = 5.0

"--exclusive-dcut"
help = "Return all exclusive jets where further merging would have d>d_cut"
arg_type = Float64

"--exclusive-njets"
help = "Return all exclusive jets once clusterisation has produced n jets"
"--eventno", "-e"
help = "Event number to process from the hard scatter events file. If not specified, the first event will be processed."
arg_type = Int
default = 1

"--distance", "-R"
help = "Distance parameter for jet merging"
Expand All @@ -53,19 +45,16 @@ function parse_command_line(args)
arg_type = RecoStrategy.Strategy
default = RecoStrategy.Best

"--dump"
help = "Write list of reconstructed jets to a JSON formatted file"

"--grid-size"
help = "Size of Rectangular grid"
arg_type = Float64
default = 0.4

"--hard-file"
"hard-file"
help = "HepMC3 event file in HepMC3 to read."
required = true

"--pileup-file"
"pileup-file"
help = "HepMC3 event file in HepMC3 to read."
required = true
end
Expand Down Expand Up @@ -150,6 +139,7 @@ function plot_set_up(y::Vector{Float64}, phi::Vector{Float64}, pt::Vector{Float6
["Pileup", "Hard Event", "Jet"], "Legend")

save(plot_title * ".png", fig)
@info "Plot '$(plot_title)' saved"
end

function main()
Expand All @@ -166,21 +156,18 @@ function main()

# Reading pileup and hard event files
events = read_final_state_particles(args[:pileup_file], jet_type;
maxevents = args[:maxevents],
skipevents = args[:skip])
maxevents = args[:pileup_maxevents],
skipevents = args[:pileup_skip])

h_events = read_final_state_particles(args[:hard_file], jet_type;
maxevents = args[:maxevents],
skipevents = args[:skip])
maxevents = args[:eventno],
skipevents = args[:eventno])

# Set up SoftKiller grid and rapidity range
rapmax = 5.0
grid_size = args[:grid_size]
soft_killer = SoftKiller(rapmax, grid_size)

algorithm = args[:algorithm]
p = args[:power]

# Ensure algorithm and power are consistent
if isnothing(args[:algorithm]) && isnothing(args[:power])
@warn "Neither algorithm nor power specified, defaulting to AntiKt"
Expand All @@ -205,8 +192,8 @@ function main()
end
end

# Fill hard event jets (from second event in h_events)
for pseudo_jet in h_events[2]
# Fill hard event jets (only the first event will be read)
for pseudo_jet in h_events[1]
push!(hard_only, pseudo_jet)
push!(all_jets_sk, pseudo_jet)
push!(all_jets, pseudo_jet)
Expand Down Expand Up @@ -248,6 +235,7 @@ function main()
pt_threshold = 0.00
# Apply SoftKiller to all_jets_sk (hard + pileup)
reduced_event, pt_threshold = softkiller(soft_killer, all_jets_sk)
@info "SoftKiller applied: $(length(reduced_event)) clusters remaining from $(length(all_jets_sk)), pt threshold = $pt_threshold"

# Plot all PseudoJets after SoftKiller, before clustering
y_all_sk, phi_all_sk, pt_all_sk,
Expand Down
69 changes: 35 additions & 34 deletions examples/softkiller/softkiller_runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,20 @@ include(joinpath(@__DIR__, "..", "parse-options.jl"))
function parse_command_line(args)
s = ArgParseSettings(autofix_names = true)
@add_arg_table! s begin
"--maxevents", "-n"
help = "Maximum number of events to read. -1 to read all events from the file."
"--pileup-maxevents", "-n"
help = "Maximum number of pileup events to read. -1 to read all events from the pileup events file."
arg_type = Int
default = -1

"--skip", "-s"
help = "Number of events to skip at beginning of the file."
"--pileup-skip", "-s"
help = "Number of events to skip in the pileup events file."
arg_type = Int
default = 0

"--ptmin"
help = "Minimum p_t for final inclusive jets (energy unit is the same as the input clusters, usually GeV)"
arg_type = Float64
default = 5.0

"--exclusive-dcut"
help = "Return all exclusive jets where further merging would have d>d_cut"
arg_type = Float64

"--exclusive-njets"
help = "Return all exclusive jets once clusterisation has produced n jets"
"--eventno", "-e"
help = "Event number to process from the hard scatter events file. If not specified, the first event will be processed."
arg_type = Int
default = 1

"--distance", "-R"
help = "Distance parameter for jet merging"
Expand All @@ -52,20 +44,17 @@ function parse_command_line(args)
arg_type = RecoStrategy.Strategy
default = RecoStrategy.Best

"--dump"
help = "Write list of reconstructed jets to a JSON formatted file"

"--grid-size"
help = "Size of Rectangular grid"
arg_type = Float64
default = 0.4

"--hard-file"
help = "HepMC3 event file in HepMC3 to read."
"hard-file"
help = "HepMC3 event file containing hard scatter events."
required = true

"--pileup-file"
help = "HepMC3 event file in HepMC3 to read."
"pileup-file"
help = "HepMC3 event file containing pileup events."
required = true
end
return parse_args(args, s; as_symbols = true)
Expand All @@ -87,12 +76,12 @@ function main()

# Reading pileup and hard event files
events = read_final_state_particles(args[:pileup_file], jet_type;
maxevents = args[:maxevents],
skipevents = args[:skip])
maxevents = args[:pileup_maxevents],
skipevents = args[:pileup_skip])

h_events = read_final_state_particles(args[:hard_file], jet_type;
maxevents = args[:maxevents],
skipevents = args[:skip])
maxevents = args[:eventno],
skipevents = args[:eventno])

# Set up SoftKiller grid and rapidity range
rapmax = 5.0
Expand All @@ -110,18 +99,30 @@ function main()

# Fill pileup jets
for event in events
for pseudo_jet in event
push!(all_jets_sk, pseudo_jet)
end
append!(all_jets_sk, event)
end

# Fill hard event jets (from second event in h_events)
for pseudo_jet in h_events[2]
push!(all_jets_sk, pseudo_jet)
end
# Fill hard event jets from first read event in h_events (actually should be only one event!)
append!(all_jets_sk, h_events[1])

# Apply SoftKiller to all_jets_sk (hard + pileup)
softkiller(soft_killer, all_jets_sk)
reduced_event, pt_threshold = softkiller(soft_killer, all_jets_sk)
@info "SoftKiller applied: $(length(reduced_event)) clusters remaining from $(length(all_jets_sk)), pt threshold = $pt_threshold"

# Workaround for cluster_hist_indedx not being correct after SoftKiller filtering
Copy link
Contributor

@mattleblanc mattleblanc Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is for a future PR, but I wanted to say that I find this procedure to be very cumbersome, in-part because it has to happen between applying filters to the input constituents and any jet finding that is done later.

SK isn't the only filter people will want to study: CS and CHS are also obvious things to look at, and grooming jets will have a similar problem.

Is there some smarter way to handle the cluster_hist_index assignment when filtering particles? In FastJet I guess that this is part of the ClusterSequence and so it's handled there instead of by the user ...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both in Fastjet and JetReconstruction the cluster history indices are part of the jet classes. The problem here is that our PseudoJets are Julia's struct and are immutable so their fields (index) can't be updated later so we have to create a new jet object with different index. Prior to #146, we had mutable struct which can be modified but had a number of other annoyances (reasoning about code with constants , less performant, more awkward C-bindings)

I guess as a minimum we could add a function taking a vector of jets after filter and returning vector of jets with normalized indices or maybe doing this inplace

Copy link
Member Author

@graeme-a-stewart graeme-a-stewart Aug 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @mattleblanc

Your point is very well taken. We started to put effort into ensuring that this would be correct, but there are many cases to cover and lots of different paths to failure!

I propose that the reconstruction algorithms themselves take responsibility for checking and fixing things if a collection is passed that has mis-set the cluster_hist_index. This should not cost much at runtime and it makes things much safer and prevents the code from bombing if there's a filtering/selection path that didn't fix-up the cluster_hist_index itself.

See #200.

AFAICR Fastjet is using mutable structures, which is inherently more dangerous for the code, but allows these kinds of fix-ups on the fly. But we have ways to make sure that this is correct in Julia as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that having the clustering algorithms themselves handle this makes the most sense, even if it costs a little bit. In this case, at least we can ensure it's done as optimally as possible, instead of leaving the job up to the user.

Mutable structs seem undesirable for the reasons you both mention...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also makes things easier in the case of SoftKiller as it can just pass back the current list of jets and not worry about having to apply these fixes - this would make #191 moot.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could consider writing custom https://juliaobjects.github.io/Accessors.jl/dev/examples/custom_optics/ if it makes everyone's life easier while keeping strict immutable

fixed_reduced_event = PseudoJet[]
sizehint!(fixed_reduced_event, length(reduced_event))
for i in eachindex(reduced_event)
push!(fixed_reduced_event,
PseudoJet(reduced_event[i].px, reduced_event[i].py, reduced_event[i].pz,
reduced_event[i].E, i,
reduced_event[i]._pt2, reduced_event[i]._inv_pt2,
reduced_event[i]._rap, reduced_event[i]._phi))
end
cs = jet_reconstruct(fixed_reduced_event; algorithm = args[:algorithm],
R = args[:distance], p = args[:power],
strategy = args[:strategy])
@info "Reconstructed softkiller filtered clusters with algorithm $(args[:algorithm]), radius $(args[:distance]) and strategy $(args[:strategy])"
end

main()
7 changes: 7 additions & 0 deletions examples/softkiller/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#! /bin/sh

# Plain softkiller example
julia --project softkiller_runtime.jl --pileup-maxevents=100 --eventno=4 --grid-size=0.4 --algorithm=Kt ../../test/data/sk_example_HS.hepmc.zst ../../test/data/sk_example_PU.hepmc.zst

# Softkiller with graphics
julia --project softkiller_plots.jl --pileup-maxevents=100 --eventno=4 --grid-size=0.4 --algorithm=Kt ../../test/data/sk_example_HS.hepmc.zst ../../test/data/sk_example_PU.hepmc.zst
Loading