OVVO-Financial
diff --git a/‎DESCRIPTION
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION
Lines changed: 2 additions & 2 deletions
diff --git a/‎NAMESPACE
Lines changed: 3 additions & 0 deletions b/‎NAMESPACE
Lines changed: 3 additions & 0 deletions
diff --git a/‎NNS_11.0.tar.gz
-1.18 MB b/‎NNS_11.0.tar.gz
-1.18 MB
diff --git a/‎NNS_11.1.tar.gz
1.18 MB b/‎NNS_11.1.tar.gz
1.18 MB
diff --git a/‎NNS_11.0.zip renamed to ‎NNS_11.1.zip
852 KB b/‎NNS_11.0.zip renamed to ‎NNS_11.1.zip
852 KB
diff --git a/‎R/SD_Cluster.R
Lines changed: 129 additions & 0 deletions b/‎R/SD_Cluster.R
Lines changed: 129 additions & 0 deletions
diff --git a/‎R/gvload.R
Lines changed: 1 addition & 1 deletion b/‎R/gvload.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎man/NNS.SD.cluster.Rd
Lines changed: 68 additions & 0 deletions b/‎man/NNS.SD.cluster.Rd
Lines changed: 68 additions & 0 deletions
diff --git a/‎src/NNS.dll
0 Bytes b/‎src/NNS.dll
0 Bytes
@@ -1,8 +1,8 @@
 Package: NNS
 Type: Package
 Title: Nonlinear Nonparametric Statistics
-Version: 11.0
-Date: 2025-01-09
+Version: 11.1
+Date: 2025-02-13
 Authors@R: c(
   person("Fred", "Viole", role=c("aut","cre"), email="[email protected]"),
   person("Roberto", "Spadim", role=c("ctb"))
 
@@ -14,6 +14,7 @@ export(NNS.CDF)
 export(NNS.FSD)
 export(NNS.FSD.uni)
 export(NNS.MC)
+export(NNS.SD.cluster)
 export(NNS.SD.efficient.set)
 export(NNS.SSD)
 export(NNS.SSD.uni)
@@ -79,6 +80,7 @@ importFrom(stats,.preformat.ts)
 importFrom(stats,acf)
 importFrom(stats,aggregate)
 importFrom(stats,approx)
+importFrom(stats,as.dist)
 importFrom(stats,coef)
 importFrom(stats,complete.cases)
 importFrom(stats,cor)
@@ -93,6 +95,7 @@ importFrom(stats,embed)
 importFrom(stats,fivenum)
 importFrom(stats,frequency)
 importFrom(stats,hat)
+importFrom(stats,hclust)
 importFrom(stats,is.ts)
 importFrom(stats,lm)
 importFrom(stats,median)
 
@@ -0,0 +1,129 @@
+#' NNS SD-based Clustering
+#'
+#' Clusters a set of variables by iteratively extracting Stochastic Dominance (SD)-efficient sets,
+#' subject to a minimum cluster size.
+#'
+#' @param data A numeric matrix or data frame of variables to be clustered.
+#' @param degree Numeric options: (1, 2, 3). Degree of stochastic dominance test.
+#' @param type Character, either \code{"continuous"} (default) or \code{"discrete"}; specifies the type of CDF.
+#' @param min_cluster Integer. The minimum number of elements required for a valid cluster.
+#' @param dendrogram Logical; \code{FALSE} (default). If \code{TRUE}, a dendrogram is produced based on a simple "distance" measure between clusters.
+#'
+#' @return
+#' A list with the following components:
+#' \itemize{
+#'   \item \code{Clusters}: A named list of cluster memberships where each element is the set of variable names belonging to that cluster.
+#'   \item \code{Dendrogram} (optional): If \code{dendrogram = TRUE}, an \code{hclust} object is also returned.
+#' }
+#'
+#' @details
+#' The function applies \code{\link{NNS.SD.efficient.set}} iteratively, peeling off the SD-efficient set at each step
+#' if it meets or exceeds \code{min_cluster} in size, until no more subsets can be extracted or all variables are exhausted.
+#' Variables in each SD-efficient set form a cluster, with any remaining variables aggregated into the final cluster if it meets
+#' the \code{min_cluster} threshold.
+#'
+#' @author Fred Viole, OVVO Financial Systems
+#'
+#' @references Viole, F. and Nawrocki, D. (2016) "LPM Density Functions for the Computation of the SD Efficient Set." Journal of Mathematical Finance, 6, 105-126.  \doi{10.4236/jmf.2016.61012}.
+#'
+#' Viole, F. (2017) "A Note on Stochastic Dominance." \doi{10.2139/ssrn.3002675}
+#'
+#' @examples
+#' \dontrun{
+#' set.seed(123)
+#' x <- rnorm(100)
+#' y <- rnorm(100)
+#' z <- rnorm(100)
+#' A <- cbind(x, y, z)
+#'
+#' # Perform SD-based clustering (degree 1), requiring at least 2 elements per cluster
+#' results <- NNS.SD.cluster(data = A, degree = 1, min_cluster = 2)
+#' print(results$Clusters)
+#'
+#' # Produce a dendrogram as well
+#' results_with_dendro <- NNS.SD.cluster(data = A, degree = 1, min_cluster = 2, dendrogram = TRUE)
+#' plot(results_with_dendro$Dendrogram)
+#' }
+#'
+#' @export
+ 
+
+NNS.SD.cluster <- function(data, degree = 1, type = "continuous", min_cluster = 1, dendrogram = FALSE) {
+  clusters <- list()
+  iteration <- 1
+  
+  # Ensure the input data is a matrix
+  remaining_data <- as.matrix(data)
+  
+  # Continue clustering until the number of remaining columns is less than or equal to min_cluster
+  while (ncol(remaining_data) > min_cluster) {
+    # Use the original NNS.SD.efficient.set call as provided
+    SD_set <- NNS.SD.efficient.set(remaining_data, degree = degree, type = type, status = FALSE)
+    
+    if (length(SD_set) == 0) {
+      break
+    }
+    
+    # Store the SD-efficient set as a cluster
+    clusters[[paste0("Cluster_", iteration)]] <- SD_set
+    
+    # Remove the identified SD set from remaining_data
+    remaining_data <- remaining_data[, !(colnames(remaining_data) %in% SD_set), drop = FALSE]
+    
+    # Ensure remaining_data remains a matrix
+    remaining_data <- as.matrix(remaining_data)
+    
+    iteration <- iteration + 1
+    
+    # If the number of remaining columns is now less than or equal to min_cluster, add them as the final cluster
+    if (ncol(remaining_data) <= min_cluster) {
+      clusters[[paste0("Cluster_", iteration)]] <- colnames(remaining_data)
+      break
+    }
+  }
+  
+  # If there are still variables left (and not already added), add them as the final cluster
+  if (ncol(remaining_data) > min_cluster && !paste0("Cluster_", iteration) %in% names(clusters)) {
+    clusters[[paste0("Cluster_", iteration)]] <- colnames(remaining_data)
+  }
+  
+  # Check if the final cluster has fewer elements than min_cluster; if so, merge it with the previous cluster (if one exists)
+  final_cluster_name <- paste0("Cluster_", length(clusters))
+  if (length(clusters[[final_cluster_name]]) < min_cluster && length(clusters) > 1) {
+    previous_cluster_name <- paste0("Cluster_", length(clusters) - 1)
+    clusters[[previous_cluster_name]] <- c(clusters[[previous_cluster_name]], clusters[[final_cluster_name]])
+    clusters[[final_cluster_name]] <- NULL
+  }
+  
+  # Flatten the clusters into a single vector and generate cluster labels
+  all_vars <- unlist(clusters)
+  cluster_labels <- unlist(lapply(seq_along(clusters), function(i) rep(i, length(clusters[[i]]))))
+  
+  
+  if(dendrogram){
+    # Ensure there are at least two variables for hierarchical clustering
+    if (length(all_vars) < 2) {
+      warning("Not enough variables for hierarchical clustering. Returning clusters only.")
+      return(list("Clusters" = clusters, "Dendrogram" = NULL))
+    }
+    
+    # Create a distance matrix based on cluster labels
+    dist_matrix <- as.dist(outer(cluster_labels, cluster_labels, function(a, b) abs(a - b)))
+    # For a "dist" object, assign labels using the Labels attribute instead of rownames.
+    attr(dist_matrix, "Labels") <- all_vars
+    
+    # Perform hierarchical clustering
+    hc <- hclust(dist_matrix, method = "complete")
+    
+    plot(hc,
+         main = paste0("Hierarchical Clustering of Stochastic Dominance Sets \nSD Degree: ", degree),
+         xlab = "Variables",
+         ylab = "SD Distance",
+         sub = ""
+    )
+    
+    
+    return(list("Clusters" = clusters, "Dendrogram" = hc))
+  } else return(list("Clusters" = clusters))
+}
+
@@ -4,7 +4,7 @@
 #' @importFrom graphics abline boxplot legend lines par plot points segments text matplot title axis mtext barplot hist strwidth polygon
 #' @importFrom quantmod getSymbols
 #' @importFrom Rfast colmeans rowmeans rowsums comb_n
-#' @importFrom stats coef cor cov lm na.omit sd median complete.cases resid uniroot aggregate density hat qnorm model.matrix fivenum acf qt ecdf time approx embed frequency is.ts runif start ts optim quantile optimize dnorm dlnorm dexp dt t.test wilcox.test .preformat.ts var poly
+#' @importFrom stats coef cor cov lm na.omit sd median complete.cases resid uniroot aggregate density hat qnorm model.matrix fivenum acf qt ecdf time approx embed frequency is.ts runif start ts optim quantile optimize dnorm dlnorm dexp dt t.test wilcox.test .preformat.ts var poly hclust as.dist
 #' @importFrom utils globalVariables head tail combn flush.console
 #' @importFrom xts to.monthly
 #' @importFrom zoo as.yearmon index
 
@@ -3,7 +3,7 @@
 
 
 
-[![packageversion](https://img.shields.io/badge/NNS%20version-11.0-blue.svg?style=flat-square)](https://github.com/OVVO-Financial/NNS/commits/NNS-Beta-Version)   [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html)
+[![packageversion](https://img.shields.io/badge/NNS%20version-11.1-blue.svg?style=flat-square)](https://github.com/OVVO-Financial/NNS/commits/NNS-Beta-Version)   [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html)
 
 <h2 style="margin: 0; padding: 0; border: none; height: 40px;"></h2>
 
@@ -56,7 +56,7 @@ Please see https://github.com/OVVO-Financial/NNS/blob/NNS-Beta-Version/examples/
     title = {NNS: Nonlinear Nonparametric Statistics},
     author = {Fred Viole},
     year = {2016},
-    note = {R package version 11.0},
+    note = {R package version 11.1},
     url = {https://CRAN.R-project.org/package=NNS},
   }
 ```