Add a Correlation-Based Covariance Transformation Option to the DE Crossover Operation Transformations (UB-SSDC-Lab#59)

GrantHecht · LaSalle · dlasalle37 · web-flow · commit d1bac6b5b6fb · 2025-07-07T18:23:53.000-04:00
Implements UncorrelatedCovarianceTransformation as a new DE crossover operation transformation, which constructs the transformation based on the N best uncorrelated candidates. 

---------

Co-authored-by: LaSalle &lt;dmlasall@buffalo.edu&gt;
Co-authored-by: Dylan &lt;dlasalle37@gmail.com&gt;
diff --git a/docs/src/lib/public.md b/docs/src/lib/public.md
@@ -132,6 +132,8 @@ SelfBinomialCrossoverParameters
 SelfBinomialCrossoverParameters(; dist=::Any, transform=::Any)
 CovarianceTransformation
 CovarianceTransformation(::Any,::Any,::Any)
+UncorrelatedCovarianceTransformation
+UncorrelatedCovarianceTransformation(::Any,::Any,::Any)
 ```
 
 ### Monotonic Basin Hopping
diff --git a/scripts/de_testing.jl b/scripts/de_testing.jl
@@ -42,7 +42,7 @@ function rastrigin(x; A=10)
 end
 
 # Setup Problem
-N = 2
+N = 7
 ss = ContinuousRectangularSearchSpace([-100.0 for i in 1:N], [100.0 for i in 1:N])
 prob = GlobalOptimization.OptimizationProblem(rastrigin, ss)
 
@@ -54,7 +54,9 @@ mutation_strategy = SelfMutationParameters(
 )
 crossover_strategy = SelfBinomialCrossoverParameters(;
     dist=Uniform(0.0, 1.0),
-    #transform = GlobalOptimization.CovarianceTransformation(0.1, 0.5, N),
+    #transform = GlobalOptimization.NoTransformation()
+    transform = GlobalOptimization.UncorrelatedCovarianceTransformation(0.5, 0.8, N; ps = 1.0),
+    #transform = GlobalOptimization.CovarianceTransformation(0.1, 0.5, N)
 )
 
 de = DE(
@@ -65,10 +67,10 @@ de = DE(
     max_stall_iterations=100,
     mutation_params=mutation_strategy,
     crossover_params=crossover_strategy,
-    show_trace=Val(true),
+    show_trace=Val(false),
 )
 
-res = optimize!(de)
+res = @btime optimize!(de)
 #iters_per_solve = map(i->optimize!(deepcopy(de)).iters, 1:100);
 
 # bb_res = bboptimize(
diff --git a/src/DE/crossover.jl b/src/DE/crossover.jl
@@ -36,6 +36,14 @@ Subtypes of this abstract type should define the following methods:
 """
 abstract type AbstractCrossoverTransformation end
 
+"""
+    RotationMatrixBasedCrossoverTransformation
+
+An abstract type representing a transformation applied to a candidate prior to applying
+the crossover operator which uses an orthogonal rotation matrix.
+"""
+abstract type RotationMatrixBasedCrossoverTransformation <: AbstractCrossoverTransformation end
+
 """
     NoTransformation
 
@@ -62,7 +70,7 @@ DOI: [10.1016/j.asoc.2014.01.038](https://doi.org/10.1016/j.asoc.2014.01.038).
 - `ct::Vector{Float64}`: The transformed candidate.
 - `mt::Vector{Float64}`: The transformed mutant.
 """
-struct CovarianceTransformation <: AbstractCrossoverTransformation
+struct CovarianceTransformation <: RotationMatrixBasedCrossoverTransformation
     ps::Float64
     pb::Float64
     B::Matrix{Float64}
@@ -113,15 +121,110 @@ struct CovarianceTransformation <: AbstractCrossoverTransformation
     end
 end
 
+"""
+    UncorrelatedCovarianceTransformation{T<:AbstractCrossoverTransformation}
+
+A transformation for performing crossover in the eigen-space of the covariance matrix of the
+best candidates in the population which are also not too closely correlated.
+
+This is an implementation of the method proposed by Wang and Li in "Differential Evolution
+Based on Covariance Matrix Learning and Bimodal Distribution Parameter Setting, " 2014,
+DOI: [10.1016/j.asoc.2014.01.038](https://doi.org/10.1016/j.asoc.2014.01.038).
+
+Correlation alteration based on "Covariance Matrix Learning Differential Evolution Algorithm Based on Correlation"
+DOI: https://doi.org/10.4236/ijis.2021.111002
+
+# Fields
+- `ps::Float64`: The proportion of candidates to consider in the covariance matrix. That is,
+    for a population size of `N` with `M` candidates remaining after the correlation check, the covariance matrix is calculated using the
+    `clamp(ceil(ps * M), 2, M)` best candidates.
+- `pb::Float64`: The probability of applying the transformation.
+- `a::Float64`: The correlation threshold for which two candidates are considered 'too close' to both be used in the covariance matrix construction.
+- `B::Matrix{Float64}`: The real part of the eigenvectors of the covariance matrix.
+- `ct::Vector{Float64}`: The transformed candidate.
+- `mt::Vector{Float64}`: The transformed mutant.
+- `idxs::Vector{UInt16}`: A vector of indexes for the population
+- `cidxs::Vector{UInt16}`: A vector of unique `correlated` indexes for the population set for removal
+"""
+struct UncorrelatedCovarianceTransformation <: RotationMatrixBasedCrossoverTransformation
+    ps::Float64
+    pb::Float64
+    a::Float64
+    B::Matrix{Float64}
+
+    # Preallocate storage for transformed candidate and mutant
+    ct::Vector{Float64}
+    mt::Vector{Float64}
+
+    # Preallocate storage for calculating transformation
+    idxs::Vector{UInt16}
+
+    cidxs::Vector{UInt16}
+
+    @doc """
+        UncorrelatedCovarianceTransformation{T<:AbstractCrossoverTransformation}
+
+    A transformation for performing crossover in the eigen-space of the covariance matrix of the
+    best candidates in the population which are also not too closely correlated.
+
+    This is an implementation of the method proposed by Yuan and Feng in "Covariance Matrix
+    Learning Differential Evolution Algorithm Based on Correlation"
+    DOI: https://doi.org/10.4236/ijis.2021.111002
+
+    # Arguments
+    - `pb::Float64`: The probability of applying the transformation.
+    - `a::Float64`: The correlation threshold for two candidates being 'too close'.
+    - `num_dims::Int`: The number of dimensions in the search space.
+
+    # Keyword Arguments:
+    - `ps::Float64`: The proportion of candidates to consider in the covariance matrix.
+        Defaults to 1.0 (i.e., all uncorrelated candidates are considered)
+
+    # Returns
+    - `UncorrelatedCovarianceTransformation`: A `UncorrelatedCovarianceTransformation` object with the specified
+        parameters.
+
+    # Examples
+    ```julia-repl
+    julia> using GlobalOptimization
+    julia> transformation = UncorrelatedCovarianceTransformation(0.5, .95, 10; ps = 1.0)
+    UncorrelatedCovarianceTransformation(1.0, 0.5, 0.95, [1.0630691323565e-311 1.0630691151907e-311 … 1.063069230316e-311 1.063069119645e-311; 1.0630691158705e-311 1.063069115333e-311 … 1.063069172704e-311 1.0630692904614e-311; … ; 1.063069115428e-311 1.063069114246e-311 … 1.063069124886e-311 1.0630694190924e-311; 1.0630691153804e-311 1.0630691141986e-311 … 1.0630691348624e-311 1.063069428614e-311], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], UInt16[], UInt16[])
+    ```
+    """
+    function UncorrelatedCovarianceTransformation(pb, a, num_dims; ps = 1.0)
+        if ps <= 0.0 || ps > 1.0
+            throw(ArgumentError("ps must be in the range (0, 1]."))
+        end
+        if pb <= 0.0 || pb > 1.0
+            throw(ArgumentError("pb must be in the range (0, 1]."))
+        end
+        if a <= 0.0 || a > 1.0
+            throw(ArgumentError("a must be in the range (0, 1]."))
+        end
+        B = Matrix{Float64}(undef, num_dims, num_dims)
+        return new(ps, pb, a, B, zeros(num_dims), zeros(num_dims), Vector{UInt16}(undef, 0), Vector{UInt16}(undef, 0))
+    end
+
+end
+
 initialize!(transformation::NoTransformation, population_size) = nothing
-function initialize!(transformation::CovarianceTransformation, population_size)
+function initialize!(transformation::RotationMatrixBasedCrossoverTransformation, population_size)
+    resize!(transformation.idxs, population_size)
+    transformation.idxs .= 1:population_size
+    return nothing
+end
+
+function initialize!(transformation::UncorrelatedCovarianceTransformation, population_size)
     resize!(transformation.idxs, population_size)
+    resize!(transformation.cidxs, population_size)
     transformation.idxs .= 1:population_size
+    empty!(transformation.cidxs)
     return nothing
 end
 
 update_transformation!(transformation::NoTransformation, population) = nothing
 function update_transformation!(transformation::CovarianceTransformation, population)
+
     # Get number of candidates to consider in the covariance matrix
     n = clamp(ceil(Int, transformation.ps * length(population)), 2, length(population))
 
@@ -138,9 +241,76 @@ function update_transformation!(transformation::CovarianceTransformation, popula
 
     return nothing
 end
+function update_transformation!(transformation::UncorrelatedCovarianceTransformation, population)
+    # get correlation for each pair of vectors in population
+    cor_mat = cor(stack(population.current_generation.candidates))
+
+    # store population_size
+    pop_size = length(population)
+
+    if all_correlated(cor_mat, transformation.a)
+        # If all correlated, set transform to identity and return
+        #let's just set the transformation to identity as it doesn't
+        # really make sense to compute the covariance matrix transformation given this
+        # transformation is for specifically avoiding using correlated candidates when
+        # computing the transformation.
+        fill_identity!(transformation.B)
+        return nothing
+    else
+        # lower triangular so that we only have unique pairs (excluding diagonal, since all elements are 1.0)
+        tril!(cor_mat, -1)
+
+        #  find points where two candidates are strongly correlated
+
+        @inbounds for j in axes(cor_mat,2)
+            for i in j+1:size(cor_mat,1)
+                abs_x = abs(cor_mat[i, j])
+                if abs_x >= transformation.a
+                    #Base.push!(transformation.pairs, SVector(i, j))
+                    if !in(i, transformation.cidxs)
+                        Base.push!(transformation.cidxs, i)
+                    end
+                end
+            end
+        end
+
+
+        # if we're removing all but one idx, set transformation to identity and return
+        if length(transformation.cidxs) >= length(transformation.idxs) - 1
+            fill_identity!(transformation.B)
+            return nothing
+        end
+
+        # sort transformation.idxs in order of best candidate to worst candidate
+        sortperm!(transformation.idxs, population.current_generation.candidates_fitness)
+
+        # remove candidates (note setdiff preserves order)
+        setdiff!(transformation.idxs, transformation.cidxs)
+
+        # get number of candidates to use for covariance based on remaining candidates
+        n = clamp(ceil(Int, transformation.ps * length(transformation.idxs)), 2, length(transformation.idxs))
+
+        # Get indices of n best remaining candidates
+        idxs = view(transformation.idxs, 1:n)
+
+        # Calculate the covariance matrix for n best candidates
+        C = cov(view(population.current_generation.candidates, idxs))
+
+        # Compute eigen decomposition
+        E = eigen!(C)
+        transformation.B .= real.(E.vectors)
+
+        # Reset preallocated storage
+        empty!(transformation.cidxs)
+        resize!(transformation.idxs, pop_size)
+        transformation.idxs .= 1:pop_size
+    end
+
+    return nothing
+end
 
 to_transformed(transformation::NoTransformation, c, m) = c, m, false
-function to_transformed(transformation::CovarianceTransformation, c, m)
+function to_transformed(transformation::RotationMatrixBasedCrossoverTransformation, c, m)
     if rand() < transformation.pb
         mul!(transformation.ct, transpose(transformation.B), c)
         mul!(transformation.mt, transpose(transformation.B), m)
@@ -151,7 +321,7 @@ function to_transformed(transformation::CovarianceTransformation, c, m)
 end
 
 from_transformed!(transformation::NoTransformation, mt, m) = nothing
-function from_transformed!(transformation::CovarianceTransformation, mt, m)
+function from_transformed!(transformation::RotationMatrixBasedCrossoverTransformation, mt, m)
     mul!(m, transformation.B, mt)
     return nothing
 end
diff --git a/src/GlobalOptimization.jl b/src/GlobalOptimization.jl
@@ -4,12 +4,12 @@ using ADTypes: AbstractADType
 using ChunkSplitters: chunks, ChunkSplitters, RoundRobin
 using Distributions: Cauchy, Laplace, MixtureModel
 using LatinHypercubeSampling: scaleLHC, LHCoptim
-using LinearAlgebra: dot, eigen!, mul!
+using LinearAlgebra: dot, eigen!, mul!, tril!
 using LineSearches: HagerZhang, InitialStatic
 using Polyester: @batch
 using Printf: format, Format
 using StaticArrays: SA, SVector
-using Statistics: cov, mean, median!, std
+using Statistics: cov, mean, median!, std, cor
 using Random: rand, rand!, shuffle!, AbstractRNG, GLOBAL_RNG
 using UnPack: @unpack
 
@@ -18,6 +18,7 @@ using NonlinearSolve: NonlinearSolve
 using Optim: Optim
 
 # Base
+include("utils.jl")
 include("enums.jl")
 include("tracing.jl")
 include("Options.jl")
@@ -64,7 +65,7 @@ export Rand1, Rand2, Best1, Best2, CurrentToBest1, CurrentToBest2
 export CurrentToRand1, CurrentToRand2, RandToBest1, RandToBest2, Unified
 export MutationParameters, SelfMutationParameters
 export SelfBinomialCrossoverParameters, BinomialCrossoverParameters
-export CovarianceTransformation
+export CovarianceTransformation, UncorrelatedCovarianceTransformation
 
 export SingleHopper, MCH
 export MBHStaticDistribution, MBHAdaptiveDistribution
diff --git a/src/utils.jl b/src/utils.jl
@@ -0,0 +1,43 @@
+
+# Utility functions for GlobalOptimization.jl
+
+"""
+    all_correlated(cor, tol)
+
+Checks if the absolute value of the lower-triangular part of the correlation matrix `cor`
+are above the correlation tolerance `tol` and returns `true` if so, otherwise, returns
+`false`.
+
+# Arguments:
+- `cor::AbstractMatrix`: The correlation matrix
+- `tol::AbstractFloat`: The correlation tolerance. Elements of `cor` with an absolute value
+    greater than `tol` are assumed to be correlated.
+
+# Returns:
+- `Bool`: `true` if all elements are correlated and `false` otherwise
+"""
+function all_correlated(cor, tol)
+    @inbounds for col in first(axes(cor, 2), size(cor, 2) - 1)
+        for row in last(axes(cor, 1), size(cor, 1) - col)
+            abs_x = abs(cor[row, col])
+            if abs_x < tol
+                return false
+            end
+        end
+    end
+    return true
+end
+
+"""
+    fill_identity!(mat)
+
+Fills the `mat` in-place with the identity matrix.
+"""
+function fill_identity!(mat)
+    @inbounds for j in axes(mat, 2)
+        for i in axes(mat, 1)
+            mat[i,j] = ifelse(i == j, 1.0, 0.0)
+        end
+    end
+    return nothing
+end
diff --git a/test/base.jl b/test/base.jl
@@ -746,3 +746,16 @@ end
     end
     @test all(threads_used)
 end
+
+@testset showtiming = true "Utils" begin
+    # Test general utilities from utils.jl
+
+    mat = ones(3,3)
+    @test GlobalOptimization.all_correlated(mat, 0.8) == true
+    mat[3,1] = 0.1
+    @test GlobalOptimization.all_correlated(mat, 0.8) == false
+
+    GlobalOptimization.fill_identity!(mat)
+    @test isapprox(mat, [1. 0 0; 0 1. 0; 0 0 1.], atol=1e-8)
+
+end
diff --git a/test/de.jl b/test/de.jl