Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

work on RSP algorithm efficiency #32

Draft
wants to merge 52 commits into
base: lazy_compute_ops
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
4823207
work on algorithm efficiency
rafaqz Jan 4, 2025
b7881c0
fix tests
rafaqz Jan 13, 2025
2f680a1
work reduction with traits
rafaqz Jan 14, 2025
2839f44
more memory reductions
rafaqz Jan 14, 2025
3d2b56d
more memory reductions
rafaqz Jan 14, 2025
89aa9c0
updates
rafaqz Jan 16, 2025
9ad50fc
accurate allocation estimates
rafaqz Jan 20, 2025
f7bd5d7
fix tests
rafaqz Jan 20, 2025
214793c
fix docstring warning
rafaqz Jan 21, 2025
8abcaef
tests and allocs
rafaqz Jan 24, 2025
4186033
window testing
rafaqz Jan 28, 2025
ba234c3
tweaks
rafaqz Jan 29, 2025
5b26be2
fix doc
rafaqz Jan 29, 2025
1fa786a
fix type params
rafaqz Jan 29, 2025
759b34b
add basic windowed model init
rafaqz Jan 29, 2025
3109170
assesment tweaks
rafaqz Feb 3, 2025
305fd56
updates, windowing a litte broken
rafaqz Feb 4, 2025
9c5f7ad
update for batch assesments
rafaqz Feb 6, 2025
322b520
reorganise init
rafaqz Feb 6, 2025
ef16390
nwindows
rafaqz Jan 29, 2025
94347a2
refactor allocs to init
rafaqz Feb 7, 2025
0bd3392
updates
rafaqz Feb 11, 2025
c50e7c1
vector performance
rafaqz Feb 13, 2025
bf7091f
fix tests for vector targets
rafaqz Feb 13, 2025
fa7d952
tweaks
rafaqz Feb 13, 2025
c2ea5d1
remover Plots.jl
rafaqz Feb 14, 2025
6e61433
Merge remote-tracking branch 'refs/remotes/upstream/alg_efficiency' i…
rafaqz Feb 14, 2025
1dad5b7
optimise assessment
rafaqz Feb 14, 2025
a9479b2
bugfix assess
rafaqz Feb 14, 2025
935d729
reorganise
rafaqz Feb 17, 2025
8dd54f0
move tiles to windows.jl
rafaqz Feb 17, 2025
35fe615
format everything
rafaqz Feb 17, 2025
20bac98
reorganise and document
rafaqz Feb 17, 2025
da5768f
add reassess
rafaqz Feb 18, 2025
25bc155
cleanup
rafaqz Feb 18, 2025
f65161c
resorganise bugfix and test (re)assessments
rafaqz Feb 18, 2025
26eb243
assess performance fixes
rafaqz Feb 19, 2025
523de83
bugfixes for windows
rafaqz Feb 22, 2025
ebfc76c
bugfix and passing tests
rafaqz Feb 23, 2025
7fd91e3
use spawn for window threading
rafaqz Feb 23, 2025
fe07186
gc
rafaqz Feb 23, 2025
e296e6f
fix doc
rafaqz Feb 23, 2025
8e92af7
print wanting for empty stack output
rafaqz Feb 23, 2025
008ebea
add assessment warnings
rafaqz Mar 2, 2025
82210ec
no internal mosaic for batches
rafaqz Mar 2, 2025
ef0d821
remove nans from qualities
rafaqz Mar 2, 2025
6bc31fe
fix assessmentwarnings funcs
rafaqz Mar 3, 2025
a43ad2d
reorganise
rafaqz Mar 5, 2025
4d5244b
add and use prune_unconnected
rafaqz Mar 5, 2025
93ff283
loop over subgraphs
rafaqz Mar 9, 2025
42fecbb
tweaks
rafaqz Mar 9, 2025
9aa05cc
more than one target
rafaqz Mar 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ version = "0.3.0"

[deps]
ArnoldiMethod = "ec485272-7323-5ecc-a04f-4719b315124d"
CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622"
Expand All @@ -18,18 +19,21 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
ArnoldiMethod = "0.0.4, 0.4"
CommonSolve = "0.2"
ConstructionBase = "1.5.8"
DelimitedFiles = "1"
Graphs = "1"
LaTeXStrings = "1.1"
LinearSolve = "2.38.0"
Plots = "1.4"
ProgressLogging = "0.1"
Rasters = "0.13"
Rasters = "0.14"
SimpleWeightedGraphs = "1.1"
julia = "1.10"

[extras]
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"

[targets]
test = ["ArchGDAL"]
test = ["ArchGDAL","Plots"]
2 changes: 1 addition & 1 deletion examples/2_landmarks.jmd
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ plot(result.func_exp)


```julia
stored_problem = ConScape.StoredProblem(problem;
stored_problem = ConScape.BatchProblem(problem;
path=".", radius=20, overlap=30, threaded=true
)
ConScape.solve(stored_problem, rast)
Expand Down
36 changes: 27 additions & 9 deletions src/ConScape.jl
Original file line number Diff line number Diff line change
@@ -1,41 +1,59 @@
module ConScape

using SparseArrays, LinearAlgebra
using Graphs, Plots, SimpleWeightedGraphs, ProgressLogging, ArnoldiMethod
using Rasters
using ArnoldiMethod
using ConstructionBase
using Graphs
using LinearAlgebra
using LinearSolve
using ProgressLogging
using Rasters
using SimpleWeightedGraphs
using SparseArrays
using Rasters.DimensionalData

import CommonSolve
import CommonSolve: solve, init

# Old funcion-based interface
abstract type ConnectivityFunction <: Function end
abstract type DistanceFunction <: ConnectivityFunction end
abstract type ProximityFunction <: ConnectivityFunction end

struct least_cost_distance <: DistanceFunction end
struct expected_cost <: DistanceFunction end
struct free_energy_distance <: DistanceFunction end
struct least_cost_distance <: DistanceFunction end
struct expected_cost <: DistanceFunction end
struct free_energy_distance <: DistanceFunction end

struct survival_probability <: ProximityFunction end
struct power_mean_proximity <: ProximityFunction end
struct survival_probability <: ProximityFunction end
struct power_mean_proximity <: ProximityFunction end

# Need to define before loading files

"""
Solver

Abstract supertype for ConScape solvers.
"""
abstract type AbstractProblem end
abstract type Solver end

# Randomized shortest path algorithms
include("randomizedshortestpath.jl")
# Grid struct and methods
include("transformations.jl")
# Grid struct and methods
include("grid.jl")
# GridRSP (randomized shortest path) struct and methods
include("gridrsp.jl")
# IO
include("io.jl")
# Utilities
include("utils.jl")
# Problems
include("graph_measure.jl")
include("connectivity_measure.jl")
include("problem.jl")
include("solvers.jl")
include("tiles.jl")
include("windows.jl")
include("assessment.jl")

end
250 changes: 250 additions & 0 deletions src/assessment.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@

struct AssessmentWarnings
source_qualities_nan_found::Bool
target_qualities_nan_found::Bool
end

function Base.:(|)(aw1::AssessmentWarnings, aw2::AssessmentWarnings)
AssessmentWarnings(
aw1.source_qualities_nan_found | aw2.source_qualities_nan_found,
aw1.target_qualities_nan_found | aw2.target_qualities_nan_found,
)
end
function Base.:(&)(aw1::AssessmentWarnings, aw2::AssessmentWarnings)
AssessmentWarnings(
aw1.source_qualities_nan_found & aw2.source_qualities_nan_found,
aw1.target_qualities_nan_found & aw2.target_qualities_nan_found,
)
end
Base.any(aw::AssessmentWarnings) = aw.source_qualities_nan_found | aw.target_qualities_nan_found
Base.all(aw::AssessmentWarnings) = aw.source_qualities_nan_found & aw.target_qualities_nan_found

"""
ProblemAssessment

Abstract supertype for problem assessments.

These calculate the computation size of an
`AbstractWindowedProblem` for a specific `RasterStack`.
"""
abstract type ProblemAssessment end

Base.size(a::ProblemAssessment) = a.size

"""
WindowAssessment <: ProblemAssessment

Assessment of an AbstractWindowedProblem that holds
a `Problem`.

# Fields
- `size::Tuple{Int,Int}`: the size of the input and output RasterStack
- `shape::Tuple{Int,Int}`: the shape of the windowing
- `njobs::Int`: the number of problem runs required to finish the problem
- `grid_sizes::Vector{Tuple{Int,Int}}`: the sizes of each window
- `mask::Vector{Bool}`: Vector{Bool} where `true` values are jobs that need to be run.
- `indices::Vector{Int}`: the indices of `mask` that are `true`.
"""
@kwdef struct WindowAssessment <: ProblemAssessment
size::Tuple{Int,Int}
shape::Tuple{Int,Int}
njobs::Int
mask::Vector{Bool}
indices::Vector{Int}
warnings::AssessmentWarnings
grid_sizes::Vector{Tuple{Int,Int}}
end

"""
NestedAssessment <: ProblemAssessment

Assessment of a nested `AbstractWindowedProblem`,
that holds another `AbstractWindowedProblem`.

# Fields
- `size::Tuple{Int,Int}`: the size of the `RasterStack` input and output.
- `shape::Tuple{Int,Int}`: the shape of the windowing
- `njobs::Int`: the number of problem runs required to finish the problem
- `mask::Vector{Bool}`: Vector{Bool} where `true` values are jobs that need to be run.
- `indices::Vector{Int}`: the indices of `mask` that are `true`.
- `assessments::Vector{WindowAssessment}`: asessments at the next level down.
"""
@kwdef struct NestedAssessment <: ProblemAssessment
size::Tuple{Int,Int}
shape::Tuple{Int,Int}
njobs::Int
mask::Vector{Bool}
indices::Vector{Int}
warnings::AssessmentWarnings
assessments::Vector{WindowAssessment}
end

function Base.show(io::IO, mime::MIME"text/plain", a::ProblemAssessment)
println(io, "NestedAssessment")
println(io)
println(io, "Shape: $(a.shape)")
println(io, "Number of jobs: $(a.njobs)")
# Use SparseArrays nice matrix printing for the mask
println(io, "Job mask: ")
mask = sparse(reshape(a.mask, a.shape))
Base.print_array(io, mask)
if any(a.warnings)
show(io, mime, a.warnings)
end
end


"""
assess(p::AbstractProblem, rast::RasterStack)

Assess the computational requirements of problem
`p` for `RasterStack` `rastr`.

This can be used to indicate memory and time reequiremtents on a cluster.
"""
function assess end

function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack;
inner_target_bools=nothing,
target_ranges=_target_ranges(p, rast),
kw...
)
# Define the ranges of each window
window_ranges = _window_ranges(p, rast)

# Convert everything to Bool at the batch level so window assessments are fast
inner_targets = view(rast.target_qualities, target_ranges...)
warnings = AssessmentWarnings(
any(isnan, rast.qualities),
any(isnan, inner_targets),
)
inner_target_bools = isnothing(inner_target_bools) ? _isvalid.(inner_targets) : inner_target_bools
qualities = _isvalid.(rast.qualities)
target_qualities = falses(size(rast))
target_qualities[target_ranges...] .= inner_target_bools
bool_rast = RasterStack((; qualities, target_qualities), dims(rast))

# Calculate window sizes and allocations
grid_sizes = vec(_estimate_grid_sizes(p, bool_rast; window_ranges))

# Organise stats for each window into vectors
# Windows must have more than one source and more than one target
window_mask = map(s -> s[1] > 1 && s[2] > 1, grid_sizes)
non_empty_indices = eachindex(window_mask)[window_mask]

# Calculate global stats
njobs = count(window_mask)
shape = size(window_ranges)

WindowAssessment(size(rast), shape, njobs, window_mask, non_empty_indices, warnings, grid_sizes)
end
function assess(
p::AbstractWindowedProblem{<:AbstractWindowedProblem},
rast::AbstractRasterStack;
nthreads=Threads.nthreads(),
verbose=true,
kw...
)
# Calculate outer window ranges
window_ranges = _window_ranges(p, rast)
verbose && println("Assessing $(length(window_ranges)) jobs")

# Define a vector for all assessment data
assessments = Vector{WindowAssessment}(undef, length(window_ranges))
# Run assessments threaded as they can take a long time for large rasters
Threads.@threads for i in eachindex(vec(window_ranges))
rs = window_ranges[i]
verbose && println("Assessing batch: $i, $rs")
function empty_assesment(size)
verbose && println(" No targets found")
WindowAssessment(;
size,
shape=(0, 0),
njobs=0,
mask=Bool[],
indices=Int[],
warnings=AssessmentWarnings(false, false),
grid_sizes=Tuple{Int,Int}[],
)
end
# We only need qualities for the assessment
window_rast = rast[(:qualities, :target_qualities)][rs...]
target_ranges = _target_ranges(p, window_rast)
# Convert targets to bool as early as possible
inner_targets = view(window_rast.target_qualities, target_ranges...)
inner_target_bools = _isvalid.(inner_targets)
assessments[i] = if count(inner_target_bools) > 0
assess(p.problem, window_rast; inner_target_bools, target_ranges, nthreads, kw...)
else
empty_assesment(size(window_rast))
end
end
# Get mask and indices
mask = map(a -> any(a.mask), assessments)
non_empty_indices = eachindex(vec(mask))[mask]
# Calculate global stats
njobs = count(mask)
shape = size(window_ranges)
warnings = reduce(|, (a.warnings for a in assessments))
return NestedAssessment(size(rast), shape, njobs, mask, non_empty_indices, warnings, assessments)
end

"""
reassess(p::BatchProblem, a::NestedAssessment)

Re-asses an existing nested assesment of a [`BatchProblem`](@ref).

The returned `NestedAssessment` will exclude any batches that
already have a data folder (assumed to be successfully completed).
"""
function reassess(p::BatchProblem, a::NestedAssessment)
patch = _reassess(p, a)
a1 = ConstructionBase.setproperties(a, patch)
# Update nan_target_found from remaining indices
warnings = reduce(|, (a1.assessments[i].warnings for i in a1.indices);
init=AssessmentWarnings(false, false)
)
return ConstructionBase.setproperties(a1, (; warnings))
end
function reassess(p::BatchProblem, a::WindowAssessment)
patch = _reassess(p, a)
return ConstructionBase.setproperties(a, patch)
end

function _reassess(p, a)
# Paths for all batches
paths = batch_paths(p, size(a))
# Paths for non-empty batches
jobpaths = paths[a.indices]
# Find all the jobs that havent been saved (failed)
idxmask = .!(isdir.(jobpaths))
# Generate new arrays of indices and assessments for the remaining jobs
indices = a.indices[idxmask]
mask = fill(false, prod(a.shape))
mask[indices] .= true
njobs = length(indices)
return (; njobs, mask, indices)
end

# Accept ProblemAssessment as an argument to solve and init
# To used instead of keywords
solve(p::BatchProblem, rast::RasterStack, a::ProblemAssessment, i::Int...; verbose=false) =
solve!(init(p, rast, a), p, i...; verbose)

function init(p::BatchProblem{<:WindowedProblem}, rast::RasterStack, a::NestedAssessment, i::Int...; kw...)
batch_ranges = _window_ranges(p, rast)
grid_sizes = map(a.assessments) do a_w
a_w.grid_sizes
end
selected_window_indices = map(a.assessments) do a_w
a_w.indices
end
init(p, rast, i...;
batch_ranges,
batch_indices=a.indices,
grid_sizes,
selected_window_indices,
)
end
init(p::BatchProblem{<:Problem}, rast::RasterStack, a::WindowAssessment, i::Int...; kw...) =
init(p, rast, i...; batch_indices=a.indices)
Loading